From 30abced9ffc6615f196afa186090b6db4e4f4e8a Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Thu, 12 Sep 2024 19:56:03 -0400 Subject: [PATCH] Return NULL from lm instead of refactor --- .../apps/r/calculate_interaction_zscores.R | 96 ++++++++++--------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 39a2bf84..4a77f4c4 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -204,7 +204,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first() ) - # Main statistics and shifts stats <- df %>% mutate( WT_L = mean_L, @@ -236,14 +235,14 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c stats <- stats %>% group_by(across(all_of(group_vars))) %>% mutate( - Raw_Shift_L = mean_L - bg_means$L, - Raw_Shift_K = mean_K - bg_means$K, - Raw_Shift_r = mean_r - bg_means$r, - Raw_Shift_AUC = mean_AUC - bg_means$AUC, - Z_Shift_L = Raw_Shift_L / bg_sd$L, - Z_Shift_K = Raw_Shift_K / bg_sd$K, - Z_Shift_r = Raw_Shift_r / bg_sd$r, - Z_Shift_AUC = Raw_Shift_AUC / bg_sd$AUC + Raw_Shift_L = mean_L[[1]] - bg_means$L, + Raw_Shift_K = mean_K[[1]] - bg_means$K, + Raw_Shift_r = mean_r[[1]] - bg_means$r, + Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC, + Z_Shift_L = Raw_Shift_L[[1]] / bg_sd$L, + Z_Shift_K = Raw_Shift_K[[1]] / bg_sd$K, + Z_Shift_r = Raw_Shift_r[[1]] / bg_sd$r, + Z_Shift_AUC = Raw_Shift_AUC[[1]] / bg_sd$AUC ) stats <- stats %>% @@ -256,7 +255,9 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Delta_K = mean_K - Exp_K, Delta_r = mean_r - Exp_r, Delta_AUC = mean_AUC - Exp_AUC - ) %>% + ) + + stats <- stats %>% mutate( Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L), Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), @@ -265,7 +266,15 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L) ) - # Create linear models with proper error handling for insufficient data + stats <- stats %>% + mutate( + Zscore_L = Delta_L / WT_sd_L, + Zscore_K = Delta_K / WT_sd_K, + Zscore_r = Delta_r / WT_sd_r, + Zscore_AUC = Delta_AUC / WT_sd_AUC + ) + + # Create linear models with error handling for missing/insufficient data lms <- stats %>% summarise( lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NULL, @@ -274,25 +283,23 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NULL ) - # Join models and calculate interaction scores stats <- stats %>% left_join(lms, by = group_vars) %>% mutate( - lm_Score_L = sapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - lm_Score_K = sapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - lm_Score_r = sapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - lm_Score_AUC = sapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - R_Squared_L = sapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA), - R_Squared_K = sapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA), - R_Squared_r = sapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA), - R_Squared_AUC = sapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA), + lm_Score_L = sapply(lm_L, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + lm_Score_K = sapply(lm_K, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + lm_Score_r = sapply(lm_r, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + lm_Score_AUC = sapply(lm_AUC, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + R_Squared_L = sapply(lm_L, function(model) if (!is.na(model)) summary(model)$r.squared else NA), + R_Squared_K = sapply(lm_K, function(model) if (!is.na(model)) summary(model)$r.squared else NA), + R_Squared_r = sapply(lm_r, function(model) if (!is.na(model)) summary(model)$r.squared else NA), + R_Squared_AUC = sapply(lm_AUC, function(model) if (!is.na(model)) summary(model)$r.squared else NA), Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) ) - # Calculate Z-scores stats <- stats %>% mutate( Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, @@ -305,34 +312,32 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE) ) - # Final output preparation + # Declare column order for output calculations <- stats %>% - select( - "OrfRep", "Gene", "num", "conc_num", "conc_num_factor", - "mean_L", "mean_K", "mean_r", "mean_AUC", - "median_L", "median_K", "median_r", "median_AUC", - "sd_L", "sd_K", "sd_r", "sd_AUC", - "se_L", "se_K", "se_r", "se_AUC", - "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", - "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", - "WT_L", "WT_K", "WT_r", "WT_AUC", - "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC", - "Exp_L", "Exp_K", "Exp_r", "Exp_AUC", - "Delta_L", "Delta_K", "Delta_r", "Delta_AUC", - "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC", - "NG", "SM", "DB") %>% + select("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", + "mean_L", "mean_K", "mean_r", "mean_AUC", + "median_L", "median_K", "median_r", "median_AUC", + "sd_L", "sd_K", "sd_r", "sd_AUC", + "se_L", "se_K", "se_r", "se_AUC", + "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", + "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", + "WT_L", "WT_K", "WT_r", "WT_AUC", + "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC", + "Exp_L", "Exp_K", "Exp_r", "Exp_AUC", + "Delta_L", "Delta_K", "Delta_r", "Delta_AUC", + "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC", + "NG", "SM", "DB") %>% ungroup() interactions <- stats %>% - select( - "OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r", - "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", - "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r", - "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", - "Sum_Zscore_L", "Sum_Zscore_K", "Sum_Zscore_r", "Sum_Zscore_AUC", - "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC", - "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC", - "NG", "SM", "DB") %>% + select("OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r", + "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", + "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r", + "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", + "Sum_Zscore_L", "Sum_Zscore_K", "Sum_Zscore_r", "Sum_Zscore_AUC", + "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC", + "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC", + "NG", "SM", "DB") %>% arrange(desc(lm_Score_L)) %>% arrange(desc(NG)) %>% ungroup() @@ -340,7 +345,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c return(list(calculations = calculations, interactions = interactions)) } - generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) { message("Generating html and pdf plots for: ", file_name, ".pdf|html")