From 57ff7698a323240da518d81a34d60e9f176de924 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Wed, 25 Sep 2024 02:28:46 -0400 Subject: [PATCH] Fix Z_lm calculations --- .../apps/r/calculate_interaction_zscores.R | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 6ce22a06..7e919fe5 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -191,14 +191,14 @@ calculate_summary_stats <- function(df, variables, group_vars) { return(list(summary_stats = summary_stats, df_with_stats = df_joined)) } -calculate_interaction_scores <- function(df, max_conc, bg_stats, variables = c("L", "K", "r", "AUC"), +calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars = c("OrfRep", "Gene", "num")) { # Calculate total concentration variables total_conc_num <- length(unique(df$conc_num)) calculations <- df %>% - group_by(OrfRep, Gene, num) %>% + group_by(across(all_of(group_vars))) %>% mutate( NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), @@ -260,19 +260,34 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, variables = c(" R_Squared_L = map_dbl(gene_lm_L, ~ summary(.x)$r.squared), R_Squared_K = map_dbl(gene_lm_K, ~ summary(.x)$r.squared), R_Squared_r = map_dbl(gene_lm_r, ~ summary(.x)$r.squared), - R_Squared_AUC = map_dbl(gene_lm_AUC, ~ summary(.x)$r.squared), - - # Calculate Z_lm_* Scores - Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE), - Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE), - Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE), - Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE) + R_Squared_AUC = map_dbl(gene_lm_AUC, ~ summary(.x)$r.squared) ) %>% ungroup() + # Calculate overall mean and SD for lm_Score_* variables + lm_means_sds <- calculations %>% + summarise( + lm_mean_L = mean(lm_Score_L, na.rm = TRUE), + lm_sd_L = sd(lm_Score_L, na.rm = TRUE), + lm_mean_K = mean(lm_Score_K, na.rm = TRUE), + lm_sd_K = sd(lm_Score_K, na.rm = TRUE), + lm_mean_r = mean(lm_Score_r, na.rm = TRUE), + lm_sd_r = sd(lm_Score_r, na.rm = TRUE), + lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE), + lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE) + ) + + calculations <- calculations %>% + mutate( + Z_lm_L = (lm_Score_L - lm_means_sds$lm_mean_L) / lm_means_sds$lm_sd_L, + Z_lm_K = (lm_Score_K - lm_means_sds$lm_mean_K) / lm_means_sds$lm_sd_K, + Z_lm_r = (lm_Score_r - lm_means_sds$lm_mean_r) / lm_means_sds$lm_sd_r, + Z_lm_AUC = (lm_Score_AUC - lm_means_sds$lm_mean_AUC) / lm_means_sds$lm_sd_AUC + ) + # Summarize some of the stats interactions <- calculations %>% - group_by(OrfRep, Gene, num) %>% + group_by(across(all_of(group_vars))) %>% mutate( # Calculate raw shifts Raw_Shift_L = first(Raw_Shift_L),