Quellcode durchsuchen

Suppress some interaction score warnings

Bryan Roessler vor 7 Monaten
Ursprung
Commit
4b2157df5f
1 geänderte Dateien mit 54 neuen und 21 gelöschten Zeilen
  1. 54 21
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 54 - 21
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -275,25 +275,60 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
     )
 
   # Create linear models with error handling for missing/insufficient data
+  # This part is a PITA so best to contain it in its own function
+  calculate_lm_values <- function(y, x) {
+    if (length(unique(x)) > 1 && sum(!is.na(y)) > 1) {
+      # Suppress warnings only for perfect fits or similar issues
+      model <- suppressWarnings(lm(y ~ x))
+      coefficients <- coef(model)
+      r_squared <- tryCatch({
+        summary(model)$r.squared
+      }, warning = function(w) {
+        NA  # Set r-squared to NA if there's a warning
+      })
+      return(list(intercept = coefficients[1], slope = coefficients[2], r_squared = r_squared))
+    } else {
+      return(list(intercept = NA, slope = NA, r_squared = NA))
+    }
+  }
+
   lms <- stats %>%
-    summarise(
-      lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NA,
-      lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NA,
-      lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NA,
-      lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NA
+    group_by(across(all_of(group_vars))) %>%
+    reframe(
+      lm_L = list(calculate_lm_values(Delta_L, conc_num_factor)),
+      lm_K = list(calculate_lm_values(Delta_K, conc_num_factor)),
+      lm_r = list(calculate_lm_values(Delta_r, conc_num_factor)),
+      lm_AUC = list(calculate_lm_values(Delta_AUC, conc_num_factor))
     )
 
+  lms <- lms %>%
+    mutate(
+      lm_L_intercept = sapply(lm_L, `[[`, "intercept"),
+      lm_L_slope = sapply(lm_L, `[[`, "slope"),
+      lm_L_r_squared = sapply(lm_L, `[[`, "r_squared"),
+      lm_K_intercept = sapply(lm_K, `[[`, "intercept"),
+      lm_K_slope = sapply(lm_K, `[[`, "slope"),
+      lm_K_r_squared = sapply(lm_K, `[[`, "r_squared"),
+      lm_r_intercept = sapply(lm_r, `[[`, "intercept"),
+      lm_r_slope = sapply(lm_r, `[[`, "slope"),
+      lm_r_r_squared = sapply(lm_r, `[[`, "r_squared"),
+      lm_AUC_intercept = sapply(lm_AUC, `[[`, "intercept"),
+      lm_AUC_slope = sapply(lm_AUC, `[[`, "slope"),
+      lm_AUC_r_squared = sapply(lm_AUC, `[[`, "r_squared")
+    ) %>%
+    select(-lm_L, -lm_K, -lm_r, -lm_AUC)
+
   stats <- stats %>%
     left_join(lms, by = group_vars) %>%
     mutate(
-      lm_Score_L = lapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
-      lm_Score_K = lapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
-      lm_Score_r = lapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
-      lm_Score_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
-      R_Squared_L = lapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
-      R_Squared_K = lapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
-      R_Squared_r = lapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
-      R_Squared_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
+      lm_Score_L = lm_L_slope * max_conc + lm_L_intercept,
+      lm_Score_K = lm_K_slope * max_conc + lm_K_intercept,
+      lm_Score_r = lm_r_slope * max_conc + lm_r_intercept,
+      lm_Score_AUC = lm_AUC_slope * max_conc + lm_AUC_intercept,
+      R_Squared_L = lm_L_r_squared,
+      R_Squared_K = lm_K_r_squared,
+      R_Squared_r = lm_r_r_squared,
+      R_Squared_AUC = lm_AUC_r_squared,
       Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
       Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
       Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
@@ -302,10 +337,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
 
   stats <- stats %>%
     mutate(
-      lm_Score_L = unlist(lm_Score_L),
-      lm_Score_K = unlist(lm_Score_K),
-      lm_Score_r = unlist(lm_Score_r),
-      lm_Score_AUC = unlist(lm_Score_AUC),
       Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
       Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
       Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
@@ -356,8 +387,8 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
   plots <- lapply(plot_configs, function(config) {
     df <- config$df
 
-    print(df %>% select(any_of(c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
-      "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB"))), n = 100)
+    # print(df %>% select(any_of(c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
+    #   "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB"))), n = 5)
 
     # Define aes mapping based on the presence of y_var
     aes_mapping <- if (is.null(config$y_var)) {
@@ -785,7 +816,7 @@ main <- function() {
       file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"), row.names = FALSE)
 
     # Each plots list corresponds to a file
-    message("Generating QC plot configurations")
+    message("Generating quality control plot configurations")
     l_vs_k_plots <- list(
       list(
         df = df,
@@ -947,7 +978,7 @@ main <- function() {
       )
     )
 
-    message("Generating QC plots")
+    message("Generating quality control plots")
     generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
     generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
     generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
@@ -1046,8 +1077,10 @@ main <- function() {
       write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
 
       # Create interaction plots
+      message("Generating interaction plot configurations")
       reference_plot_configs <- generate_interaction_plot_configs(df_reference, interaction_vars)
       deletion_plot_configs <- generate_interaction_plot_configs(df_deletion, interaction_vars)
+      message("Generating interaction plots")
       generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
       generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))