diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 4a77f4c4..69401a59 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -277,23 +277,23 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c # Create linear models with error handling for missing/insufficient data lms <- stats %>% summarise( - lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NULL, - lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NULL, - lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NULL, - lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NULL + lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NA, + lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NA, + lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NA, + lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NA ) stats <- stats %>% left_join(lms, by = group_vars) %>% mutate( - lm_Score_L = sapply(lm_L, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - lm_Score_K = sapply(lm_K, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - lm_Score_r = sapply(lm_r, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - lm_Score_AUC = sapply(lm_AUC, function(model) if (!is.na(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - R_Squared_L = sapply(lm_L, function(model) if (!is.na(model)) summary(model)$r.squared else NA), - R_Squared_K = sapply(lm_K, function(model) if (!is.na(model)) summary(model)$r.squared else NA), - R_Squared_r = sapply(lm_r, function(model) if (!is.na(model)) summary(model)$r.squared else NA), - R_Squared_AUC = sapply(lm_AUC, function(model) if (!is.na(model)) summary(model)$r.squared else NA), + lm_Score_L = lapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + lm_Score_K = lapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + lm_Score_r = lapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + lm_Score_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), + R_Squared_L = lapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA), + R_Squared_K = lapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA), + R_Squared_r = lapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA), + R_Squared_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA), Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), @@ -302,6 +302,10 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c stats <- stats %>% mutate( + lm_Score_L = unlist(lm_Score_L), + lm_Score_K = unlist(lm_Score_K), + lm_Score_r = unlist(lm_Score_r), + lm_Score_AUC = unlist(lm_Score_AUC), Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs, Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1), @@ -693,6 +697,7 @@ main <- function() { summary_vars <- c("L", "K", "r", "AUC", "delta_bg") # fields to filter and calculate summary stats across group_vars <- c("OrfRep", "conc_num", "conc_num_factor") # default fields to group by + orf_group_vars <- c("OrfRep", "Gene", "num") print_vars <- c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor", "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB") @@ -1024,10 +1029,10 @@ main <- function() { message("Calculating interaction scores") # print("Reference strain:") # print(head(reference_strain)) - reference_results <- calculate_interaction_scores(reference_strain, max_conc, interaction_vars) + reference_results <- calculate_interaction_scores(reference_strain, max_conc, interaction_vars, group_vars = orf_group_vars) # print("Deletion strains:") # print(head(deletion_strains)) - deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, interaction_vars) + deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, interaction_vars, group_vars = orf_group_vars) zscores_calculations_reference <- reference_results$calculations zscores_interactions_reference <- reference_results$interactions @@ -1114,11 +1119,20 @@ main <- function() { # Formerly X_NArm zscores_interactions_filtered <- zscores_interactions %>% - group_by(across(all_of(group_vars))) %>% + group_by(across(all_of(orf_group_vars))) %>% filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) # Final filtered correlation calculations and plots + lm_results <- zscores_interactions_filtered %>% + summarise( + lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA, + lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA, + lm_R_squared_r = if (n() > 1) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA, + lm_R_squared_AUC = if (n() > 1) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA + ) + zscores_interactions_filtered <- zscores_interactions_filtered %>% + left_join(lm_results, by = orf_group_vars) %>% mutate( Overlap = case_when( Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both", @@ -1128,14 +1142,11 @@ main <- function() { Z_lm_L >= 2 & Avg_Zscore_L <= -2 ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score", Z_lm_L <= -2 & Avg_Zscore_L >= 2 ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score", TRUE ~ "No Effect" - ), - lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared, - lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared, - lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared, - lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared + ) ) %>% ungroup() + rank_plot_configs <- c( generate_rank_plot_configs(zscores_interactions_filtered, "Rank_L", "Avg_Zscore_L", "L"), generate_rank_plot_configs(zscores_interactions_filtered, "Rank_K", "Avg_Zscore_K", "K")