vor 9 Monaten · 4b2157df5f
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -275,25 +275,60 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
				     )
			
 
				 
			
 
				   # Create linear models with error handling for missing/insufficient data
			
 
				+  # This part is a PITA so best to contain it in its own function
			
 
				+  calculate_lm_values <- function(y, x) {
			
 
				+    if (length(unique(x)) > 1 && sum(!is.na(y)) > 1) {
			
 
				+      # Suppress warnings only for perfect fits or similar issues
			
 
				+      model <- suppressWarnings(lm(y ~ x))
			
 
				+      coefficients <- coef(model)
			
 
				+      r_squared <- tryCatch({
			
 
				+        summary(model)$r.squared
			
 
				+      }, warning = function(w) {
			
 
				+        NA  # Set r-squared to NA if there's a warning
			
 
				+      })
			
 
				+      return(list(intercept = coefficients[1], slope = coefficients[2], r_squared = r_squared))
			
 
				+    } else {
			
 
				+      return(list(intercept = NA, slope = NA, r_squared = NA))
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   lms <- stats %>%
			
 
				-    summarise(
			
 
				-      lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NA,
			
 
				-      lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NA,
			
 
				-      lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NA,
			
 
				-      lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NA
			
 
				+    group_by(across(all_of(group_vars))) %>%
			
 
				+    reframe(
			
 
				+      lm_L = list(calculate_lm_values(Delta_L, conc_num_factor)),
			
 
				+      lm_K = list(calculate_lm_values(Delta_K, conc_num_factor)),
			
 
				+      lm_r = list(calculate_lm_values(Delta_r, conc_num_factor)),
			
 
				+      lm_AUC = list(calculate_lm_values(Delta_AUC, conc_num_factor))
			
 
				     )
			
 
				 
			
 
				+  lms <- lms %>%
			
 
				+    mutate(
			
 
				+      lm_L_intercept = sapply(lm_L, `[[`, "intercept"),
			
 
				+      lm_L_slope = sapply(lm_L, `[[`, "slope"),
			
 
				+      lm_L_r_squared = sapply(lm_L, `[[`, "r_squared"),
			
 
				+      lm_K_intercept = sapply(lm_K, `[[`, "intercept"),
			
 
				+      lm_K_slope = sapply(lm_K, `[[`, "slope"),
			
 
				+      lm_K_r_squared = sapply(lm_K, `[[`, "r_squared"),
			
 
				+      lm_r_intercept = sapply(lm_r, `[[`, "intercept"),
			
 
				+      lm_r_slope = sapply(lm_r, `[[`, "slope"),
			
 
				+      lm_r_r_squared = sapply(lm_r, `[[`, "r_squared"),
			
 
				+      lm_AUC_intercept = sapply(lm_AUC, `[[`, "intercept"),
			
 
				+      lm_AUC_slope = sapply(lm_AUC, `[[`, "slope"),
			
 
				+      lm_AUC_r_squared = sapply(lm_AUC, `[[`, "r_squared")
			
 
				+    ) %>%
			
 
				+    select(-lm_L, -lm_K, -lm_r, -lm_AUC)
			
 
				+
			
 
				   stats <- stats %>%
			
 
				     left_join(lms, by = group_vars) %>%
			
 
				     mutate(
			
 
				-      lm_Score_L = lapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				-      lm_Score_K = lapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				-      lm_Score_r = lapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				-      lm_Score_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
			
 
				-      R_Squared_L = lapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
			
 
				-      R_Squared_K = lapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
			
 
				-      R_Squared_r = lapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
			
 
				-      R_Squared_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
			
 
				+      lm_Score_L = lm_L_slope * max_conc + lm_L_intercept,
			
 
				+      lm_Score_K = lm_K_slope * max_conc + lm_K_intercept,
			
 
				+      lm_Score_r = lm_r_slope * max_conc + lm_r_intercept,
			
 
				+      lm_Score_AUC = lm_AUC_slope * max_conc + lm_AUC_intercept,
			
 
				+      R_Squared_L = lm_L_r_squared,
			
 
				+      R_Squared_K = lm_K_r_squared,
			
 
				+      R_Squared_r = lm_r_r_squared,
			
 
				+      R_Squared_AUC = lm_AUC_r_squared,
			
 
				       Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
			
 
				       Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
			
 
				       Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
			
@@ -302,10 +337,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
				 
			
 
				   stats <- stats %>%
			
 
				     mutate(
			
 
				-      lm_Score_L = unlist(lm_Score_L),
			
 
				-      lm_Score_K = unlist(lm_Score_K),
			
 
				-      lm_Score_r = unlist(lm_Score_r),
			
 
				-      lm_Score_AUC = unlist(lm_Score_AUC),
			
 
				       Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
			
 
				       Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
			
 
				       Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
			
@@ -356,8 +387,8 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
 
				   plots <- lapply(plot_configs, function(config) {
			
 
				     df <- config$df
			
 
				 
			
 
				-    print(df %>% select(any_of(c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
			
 
				-      "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB"))), n = 100)
			
 
				+    # print(df %>% select(any_of(c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
			
 
				+    #   "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB"))), n = 5)
			
 
				 
			
 
				     # Define aes mapping based on the presence of y_var
			
 
				     aes_mapping <- if (is.null(config$y_var)) {
			
@@ -785,7 +816,7 @@ main <- function() {
 
				       file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"), row.names = FALSE)
			
 
				 
			
 
				     # Each plots list corresponds to a file
			
 
				-    message("Generating QC plot configurations")
			
 
				+    message("Generating quality control plot configurations")
			
 
				     l_vs_k_plots <- list(
			
 
				       list(
			
 
				         df = df,
			
@@ -947,7 +978,7 @@ main <- function() {
 
				       )
			
 
				     )
			
 
				 
			
 
				-    message("Generating QC plots")
			
 
				+    message("Generating quality control plots")
			
 
				     generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
			
 
				     generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
			
 
				     generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
			
@@ -1046,8 +1077,10 @@ main <- function() {
 
				       write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
			
 
				 
			
 
				       # Create interaction plots
			
 
				+      message("Generating interaction plot configurations")
			
 
				       reference_plot_configs <- generate_interaction_plot_configs(df_reference, interaction_vars)
			
 
				       deletion_plot_configs <- generate_interaction_plot_configs(df_deletion, interaction_vars)
			
 
				+      message("Generating interaction plots")
			
 
				       generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
			
 
				       generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))