Suppress some interaction score warnings

2024-09-12 22:34:24 -04:00
parent 54b32f47c7
commit 4b2157df5f
1 changed files with 54 additions and 21 deletions
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -275,25 +275,60 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
    )
  # Create linear models with error handling for missing/insufficient data
  # This part is a PITA so best to contain it in its own function
  calculate_lm_values <- function(y, x) {
    if (length(unique(x)) > 1 && sum(!is.na(y)) > 1) {
      # Suppress warnings only for perfect fits or similar issues
      model <- suppressWarnings(lm(y ~ x))
      coefficients <- coef(model)
      r_squared <- tryCatch({
        summary(model)$r.squared
      }, warning = function(w) {
        NA  # Set r-squared to NA if there's a warning
      })
      return(list(intercept = coefficients[1], slope = coefficients[2], r_squared = r_squared))
    } else {
      return(list(intercept = NA, slope = NA, r_squared = NA))
    }
  }
  lms <- stats %>%
-    summarise(
+    group_by(across(all_of(group_vars))) %>%
-      lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NA,
+    reframe(
-      lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NA,
+      lm_L = list(calculate_lm_values(Delta_L, conc_num_factor)),
-      lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NA,
+      lm_K = list(calculate_lm_values(Delta_K, conc_num_factor)),
-      lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NA
+      lm_r = list(calculate_lm_values(Delta_r, conc_num_factor)),
      lm_AUC = list(calculate_lm_values(Delta_AUC, conc_num_factor))
    )
  lms <- lms %>%
    mutate(
      lm_L_intercept = sapply(lm_L, `[[`, "intercept"),
      lm_L_slope = sapply(lm_L, `[[`, "slope"),
      lm_L_r_squared = sapply(lm_L, `[[`, "r_squared"),
      lm_K_intercept = sapply(lm_K, `[[`, "intercept"),
      lm_K_slope = sapply(lm_K, `[[`, "slope"),
      lm_K_r_squared = sapply(lm_K, `[[`, "r_squared"),
      lm_r_intercept = sapply(lm_r, `[[`, "intercept"),
      lm_r_slope = sapply(lm_r, `[[`, "slope"),
      lm_r_r_squared = sapply(lm_r, `[[`, "r_squared"),
      lm_AUC_intercept = sapply(lm_AUC, `[[`, "intercept"),
      lm_AUC_slope = sapply(lm_AUC, `[[`, "slope"),
      lm_AUC_r_squared = sapply(lm_AUC, `[[`, "r_squared")
    ) %>%
    select(-lm_L, -lm_K, -lm_r, -lm_AUC)
  stats <- stats %>%
    left_join(lms, by = group_vars) %>%
    mutate(
-      lm_Score_L = lapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
+      lm_Score_L = lm_L_slope * max_conc + lm_L_intercept,
-      lm_Score_K = lapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
+      lm_Score_K = lm_K_slope * max_conc + lm_K_intercept,
-      lm_Score_r = lapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
+      lm_Score_r = lm_r_slope * max_conc + lm_r_intercept,
-      lm_Score_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA),
+      lm_Score_AUC = lm_AUC_slope * max_conc + lm_AUC_intercept,
-      R_Squared_L = lapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
+      R_Squared_L = lm_L_r_squared,
-      R_Squared_K = lapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
+      R_Squared_K = lm_K_r_squared,
-      R_Squared_r = lapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
+      R_Squared_r = lm_r_r_squared,
-      R_Squared_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA),
+      R_Squared_AUC = lm_AUC_r_squared,
      Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
      Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
      Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
@@ -302,10 +337,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
  stats <- stats %>%
    mutate(
      lm_Score_L = unlist(lm_Score_L),
      lm_Score_K = unlist(lm_Score_K),
      lm_Score_r = unlist(lm_Score_r),
      lm_Score_AUC = unlist(lm_Score_AUC),
      Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
      Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
      Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
@@ -356,8 +387,8 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
  plots <- lapply(plot_configs, function(config) {
    df <- config$df
-    print(df %>% select(any_of(c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
+    # print(df %>% select(any_of(c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
-      "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB"))), n = 100)
+    #   "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB"))), n = 5)
    # Define aes mapping based on the presence of y_var
    aes_mapping <- if (is.null(config$y_var)) {
@@ -785,7 +816,7 @@ main <- function() {
      file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"), row.names = FALSE)
    # Each plots list corresponds to a file
-    message("Generating QC plot configurations")
+    message("Generating quality control plot configurations")
    l_vs_k_plots <- list(
      list(
        df = df,
@@ -947,7 +978,7 @@ main <- function() {
      )
    )
-    message("Generating QC plots")
+    message("Generating quality control plots")
    generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
    generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
    generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
@@ -1046,8 +1077,10 @@ main <- function() {
      write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
      # Create interaction plots
      message("Generating interaction plot configurations")
      reference_plot_configs <- generate_interaction_plot_configs(df_reference, interaction_vars)
      deletion_plot_configs <- generate_interaction_plot_configs(df_deletion, interaction_vars)
      message("Generating interaction plots")
      generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
      generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))