diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 00a3ee1c..30cb93b2 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -201,7 +201,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first() ) - interaction_scores <- df %>% + calculations <- df %>% mutate( WT_L = df$mean_L, WT_K = df$mean_K, @@ -229,20 +229,20 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c ) %>% ungroup() - interaction_scores <- interaction_scores %>% + calculations <- calculations %>% group_by(across(all_of(group_vars))) %>% mutate( Raw_Shift_L = mean_L[[1]] - bg_means$L, Raw_Shift_K = mean_K[[1]] - bg_means$K, Raw_Shift_r = mean_r[[1]] - bg_means$r, Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC, - Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]], - Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]], - Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]], - Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]] + Z_Shift_L = Raw_Shift_L[[1]] / bg_sd$L, + Z_Shift_K = Raw_Shift_K[[1]] / bg_sd$K, + Z_Shift_r = Raw_Shift_r[[1]] / bg_sd$r, + Z_Shift_AUC = Raw_Shift_AUC[[1]] / bg_sd$AUC ) - interaction_scores <- interaction_scores %>% + calculations <- calculations %>% mutate( Exp_L = WT_L + Raw_Shift_L, Delta_L = mean_L - Exp_L, @@ -254,7 +254,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Delta_AUC = mean_AUC - Exp_AUC ) - interaction_scores <- interaction_scores %>% + calculations <- calculations %>% mutate( Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L), Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), @@ -263,8 +263,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L) ) - # Calculate linear models and interaction scores - interaction_scores <- interaction_scores %>% + interactions <- calculations %>% mutate( lm_L = lm(Delta_L ~ conc_num_factor), lm_K = lm(Delta_K ~ conc_num_factor), @@ -276,20 +275,8 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Zscore_AUC = Delta_AUC / WT_sd_AUC ) - interaction_scores <- interaction_scores %>% + interactions <- interactions %>% mutate( - Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), - Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), - Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), - Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) - ) - - interaction_scores_all <- interaction_scores %>% - mutate( - Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, - Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs, - Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1), - Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1), lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1], lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1], lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1], @@ -297,25 +284,54 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c r_squared_L = summary(lm_L)$r.squared, r_squared_K = summary(lm_K)$r.squared, r_squared_r = summary(lm_r)$r.squared, - r_squared_AUC = summary(lm_AUC)$r.squared + r_squared_AUC = summary(lm_AUC)$r.squared, + Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), + Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), + Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), + Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) ) - # Calculate Z_lm for each variable - interaction_scores_all <- interaction_scores_all %>% + interactions <- interactions %>% mutate( + Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, + Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs, + Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1), + Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1), Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE), Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE), Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE), Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE) ) + calculations <- calculations %>% + select("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", + "mean_L", "mean_K", "mean_r", "mean_AUC", + "median_L", "median_K", "median_r", "median_AUC", + "sd_L", "sd_K", "sd_r", "sd_AUC", + "se_L", "se_K", "se_r", "se_AUC", + "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", + "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", + "WT_L", "WT_K", "WT_r", "WT_AUC", "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC", + "Exp_L", "Exp_K", "Exp_r", "Exp_AUC", "Delta_L", "Delta_K", "Delta_r", "Delta_AUC", + "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC", + "NG", "SM", "DB") %>% + ungroup() + # Arrange results by Z_lm_L and NG - interaction_scores_all <- interaction_scores_all %>% + interactions <- interactions %>% + select("OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r", + "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", + "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r", + "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", + "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC", + "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC", + "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC", + "NG", "SM", "DB") %>% arrange(desc(lm_Score_L)) %>% arrange(desc(NG)) %>% ungroup() - return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores)) + return(list(calculations = calculations, interactions = interaction)) } generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) { @@ -674,6 +690,7 @@ main <- function() { # Print quality control graphs before removing data due to contamination and # adjusting missing data to max theoretical values + message("Generating QC plot configurations") l_vs_k_plots <- list( list(df = df, x_var = "L", y_var = "K", plot_type = "scatter", title = "Raw L vs K before QC", @@ -864,10 +881,10 @@ main <- function() { print(head(deletion_strains)) deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables) - zscores_calculations_reference <- reference_results$zscores_calculations - zscores_interactions_reference <- reference_results$zscores_interactions - zscores_calculations <- deletion_results$zscores_calculations - zscores_interactions <- deletion_results$zscores_interactions + zscores_calculations_reference <- reference_results$calculations + zscores_interactions_reference <- reference_results$interactions + zscores_calculations <- deletion_results$calculations + zscores_interactions <- deletion_results$interactions # Writing Z-Scores to file write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE) @@ -946,9 +963,20 @@ main <- function() { generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm", plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2)) - + interaction_scores_filtered + + + # lm_summaries <- lapply(lm_list, summary) + # correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries) + # generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs) + }) + }) +} +main() + + # # Correlation plots # lm_list <- list( # lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered), @@ -957,12 +985,4 @@ main <- function() { # lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered), # lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered), # lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered) - # ) - - lm_summaries <- lapply(lm_list, summary) - correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries) - generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs) - }) - }) -} -main() + # ) \ No newline at end of file