diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 98ca2cc3..39a2bf84 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -204,64 +204,59 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first() ) + # Main statistics and shifts stats <- df %>% mutate( - WT_L = df$mean_L, - WT_K = df$mean_K, - WT_r = df$mean_r, - WT_AUC = df$mean_AUC, - WT_sd_L = df$sd_L, - WT_sd_K = df$sd_K, - WT_sd_r = df$sd_r, - WT_sd_AUC = df$sd_AUC + WT_L = mean_L, + WT_K = mean_K, + WT_r = mean_r, + WT_AUC = mean_AUC, + WT_sd_L = sd_L, + WT_sd_K = sd_K, + WT_sd_r = sd_r, + WT_sd_AUC = sd_AUC ) %>% group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>% - mutate( - N = sum(!is.na(L)), - NG = sum(NG, na.rm = TRUE), - DB = sum(DB, na.rm = TRUE), - SM = sum(SM, na.rm = TRUE), - across(all_of(variables), list( - mean = ~mean(., na.rm = TRUE), - median = ~median(., na.rm = TRUE), - max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), - min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), - sd = ~sd(., na.rm = TRUE), - se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA) - ), .names = "{.fn}_{.col}") - ) %>% + mutate( + N = sum(!is.na(L)), + NG = sum(NG, na.rm = TRUE), + DB = sum(DB, na.rm = TRUE), + SM = sum(SM, na.rm = TRUE), + across(all_of(variables), list( + mean = ~mean(., na.rm = TRUE), + median = ~median(., na.rm = TRUE), + max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), + min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), + sd = ~sd(., na.rm = TRUE), + se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA) + ), .names = "{.fn}_{.col}") + ) %>% ungroup() stats <- stats %>% group_by(across(all_of(group_vars))) %>% - mutate( - Raw_Shift_L = mean_L[[1]] - bg_means$L, - Raw_Shift_K = mean_K[[1]] - bg_means$K, - Raw_Shift_r = mean_r[[1]] - bg_means$r, - Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC, - Z_Shift_L = Raw_Shift_L[[1]] / bg_sd$L, - Z_Shift_K = Raw_Shift_K[[1]] / bg_sd$K, - Z_Shift_r = Raw_Shift_r[[1]] / bg_sd$r, - Z_Shift_AUC = Raw_Shift_AUC[[1]] / bg_sd$AUC - ) + mutate( + Raw_Shift_L = mean_L - bg_means$L, + Raw_Shift_K = mean_K - bg_means$K, + Raw_Shift_r = mean_r - bg_means$r, + Raw_Shift_AUC = mean_AUC - bg_means$AUC, + Z_Shift_L = Raw_Shift_L / bg_sd$L, + Z_Shift_K = Raw_Shift_K / bg_sd$K, + Z_Shift_r = Raw_Shift_r / bg_sd$r, + Z_Shift_AUC = Raw_Shift_AUC / bg_sd$AUC + ) stats <- stats %>% mutate( Exp_L = WT_L + Raw_Shift_L, Exp_K = WT_K + Raw_Shift_K, Exp_r = WT_r + Raw_Shift_r, - Exp_AUC = WT_AUC + Raw_Shift_AUC - ) - - stats <- stats %>% - mutate( + Exp_AUC = WT_AUC + Raw_Shift_AUC, Delta_L = mean_L - Exp_L, Delta_K = mean_K - Exp_K, Delta_r = mean_r - Exp_r, Delta_AUC = mean_AUC - Exp_AUC - ) - - stats <- stats %>% + ) %>% mutate( Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L), Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), @@ -270,39 +265,34 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L) ) - stats <- stats %>% - mutate( - Zscore_L = Delta_L / WT_sd_L, - Zscore_K = Delta_K / WT_sd_K, - Zscore_r = Delta_r / WT_sd_r, - Zscore_AUC = Delta_AUC / WT_sd_AUC - ) - + # Create linear models with proper error handling for insufficient data lms <- stats %>% summarise( - lm_L = list(lm(Delta_L ~ conc_num_factor)), - lm_K = list(lm(Delta_K ~ conc_num_factor)), - lm_r = list(lm(Delta_r ~ conc_num_factor)), - lm_AUC = list(lm(Delta_AUC ~ conc_num_factor)) + lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NULL, + lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NULL, + lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NULL, + lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NULL ) + # Join models and calculate interaction scores stats <- stats %>% left_join(lms, by = group_vars) %>% mutate( - lm_Score_L = sapply(lm_L, function(model) coef(model)[2] * max_conc + coef(model)[1]), - lm_Score_K = sapply(lm_K, function(model) coef(model)[2] * max_conc + coef(model)[1]), - lm_Score_r = sapply(lm_r, function(model) coef(model)[2] * max_conc + coef(model)[1]), - lm_Score_AUC = sapply(lm_AUC, function(model) coef(model)[2] * max_conc + coef(model)[1]), - R_Squared_L = sapply(lm_L, function(model) summary(model)$r.squared), - R_Squared_K = sapply(lm_K, function(model) summary(model)$r.squared), - R_Squared_r = sapply(lm_r, function(model) summary(model)$r.squared), - R_Squared_AUC = sapply(lm_AUC, function(model) summary(model)$r.squared), + lm_Score_L = sapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + lm_Score_K = sapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + lm_Score_r = sapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + lm_Score_AUC = sapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + R_Squared_L = sapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA), + R_Squared_K = sapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA), + R_Squared_r = sapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA), + R_Squared_AUC = sapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA), Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) ) + # Calculate Z-scores stats <- stats %>% mutate( Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, @@ -315,9 +305,10 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE) ) - # Declare column order for output + # Final output preparation calculations <- stats %>% - select("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", + select( + "OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "mean_L", "mean_K", "mean_r", "mean_AUC", "median_L", "median_K", "median_r", "median_AUC", "sd_L", "sd_K", "sd_r", "sd_AUC", @@ -332,10 +323,9 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c "NG", "SM", "DB") %>% ungroup() - # Also arrange results by Z_lm_L and NG interactions <- stats %>% - select("OrfRep", "Gene", "num", - "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r", + select( + "OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r", "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r", "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", @@ -350,6 +340,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c return(list(calculations = calculations, interactions = interactions)) } + generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) { message("Generating html and pdf plots for: ", file_name, ".pdf|html")