소스 검색

Revert map2(), what a disaster

Bryan Roessler 6 달 전
부모
커밋
40ed4133b4
1개의 변경된 파일103개의 추가작업 그리고 241개의 파일을 삭제
  1. 103 241
      qhtcp-workflow/apps/r/calculate_interaction_zscores.R

+ 103 - 241
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -217,6 +217,20 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
     group_vars <- c("OrfRep", "Gene", "Drug")
   }
 
+  perform_lm_simple <- function(x, y, max_conc) {
+    if (all(is.na(x)) || all(is.na(y)) || length(x[!is.na(x)]) == 0 || length(y[!is.na(y)]) == 0) {
+      return(list(intercept = NA, slope = NA, r_squared = NA, score = NA))
+    } else {
+      fit <- lm(y ~ x)
+      return(list(
+        intercept = coef(fit)[1],
+        slope = coef(fit)[2],
+        r_squared = summary(fit)$r.squared,
+        score = max_conc * coef(fit)[2] + coef(fit)[1]
+      ))
+    }
+  }
+
   # Calculate WT statistics from df_bg
   wt_stats <- df_bg %>%
     group_by(across(all_of(bg_group_vars))) %>%
@@ -301,78 +315,35 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
   calculations <- calculations %>%
     group_by(across(all_of(group_vars))) %>%
     mutate(
-      lm_L = map2(.x = Delta_L, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-        list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-      } else {
-        lm(.x ~ .y) %>% {
-          list(
-            intercept = coef(.)[1],
-            slope = coef(.)[2],
-            r_squared = summary(.)$r.squared,
-            score = max_conc * coef(.)[2] + coef(.)[1]
-          )
-        }
-      }),
-      lm_K = map2(.x = Delta_K, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-        list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-      } else {
-        lm(.x ~ .y) %>% {
-          list(
-            intercept = coef(.)[1],
-            slope = coef(.)[2],
-            r_squared = summary(.)$r.squared,
-            score = max_conc * coef(.)[2] + coef(.)[1]
-          )
-        }
-      }),
-      lm_r = map2(.x = Delta_r, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-        list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-      } else {
-        lm(.x ~ .y) %>% {
-          list(
-            intercept = coef(.)[1],
-            slope = coef(.)[2],
-            r_squared = summary(.)$r.squared,
-            score = max_conc * coef(.)[2] + coef(.)[1]
-          )
-        }
-      }),
-      lm_AUC = map2(.x = Delta_AUC, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-        list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-      } else {
-        lm(.x ~ .y) %>% {
-          list(
-            intercept = coef(.)[1],
-            slope = coef(.)[2],
-            r_squared = summary(.)$r.squared,
-            score = max_conc * coef(.)[2] + coef(.)[1]
-          )
-        }
-      }),
+      # Apply the simple LM function for each variable
+      lm_L = list(perform_lm_simple(Delta_L, conc_num_factor, max_conc)),
+      lm_K = list(perform_lm_simple(Delta_K, conc_num_factor, max_conc)),
+      lm_r = list(perform_lm_simple(Delta_r, conc_num_factor, max_conc)),
+      lm_AUC = list(perform_lm_simple(Delta_AUC, conc_num_factor, max_conc)),
 
       # Extract coefficients and statistics for each model
-      lm_intercept_L = map_dbl(lm_L, "intercept"),
-      lm_slope_L = map_dbl(lm_L, "slope"),
-      R_Squared_L = map_dbl(lm_L, "r_squared"),
-      lm_Score_L = map_dbl(lm_L, "score"),
-
-      lm_intercept_K = map_dbl(lm_K, "intercept"),
-      lm_slope_K = map_dbl(lm_K, "slope"),
-      R_Squared_K = map_dbl(lm_K, "r_squared"),
-      lm_Score_K = map_dbl(lm_K, "score"),
-
-      lm_intercept_r = map_dbl(lm_r, "intercept"),
-      lm_slope_r = map_dbl(lm_r, "slope"),
-      R_Squared_r = map_dbl(lm_r, "r_squared"),
-      lm_Score_r = map_dbl(lm_r, "score"),
-
-      lm_intercept_AUC = map_dbl(lm_AUC, "intercept"),
-      lm_slope_AUC = map_dbl(lm_AUC, "slope"),
-      R_Squared_AUC = map_dbl(lm_AUC, "r_squared"),
-      lm_Score_AUC = map_dbl(lm_AUC, "score")
-    ) %>%
-    select(-lm_L, -lm_K, -lm_r, -lm_AUC) %>%
-    ungroup()
+      lm_intercept_L = lm_L[[1]]$intercept,
+      lm_slope_L = lm_L[[1]]$slope,
+      R_Squared_L = lm_L[[1]]$r_squared,
+      lm_Score_L = lm_L[[1]]$score,
+
+      lm_intercept_K = lm_K[[1]]$intercept,
+      lm_slope_K = lm_K[[1]]$slope,
+      R_Squared_K = lm_K[[1]]$r_squared,
+      lm_Score_K = lm_K[[1]]$score,
+
+      lm_intercept_r = lm_r[[1]]$intercept,
+      lm_slope_r = lm_r[[1]]$slope,
+      R_Squared_r = lm_r[[1]]$r_squared,
+      lm_Score_r = lm_r[[1]]$score,
+
+      lm_intercept_AUC = lm_AUC[[1]]$intercept,
+      lm_slope_AUC = lm_AUC[[1]]$slope,
+      R_Squared_AUC = lm_AUC[[1]]$r_squared,
+      lm_Score_AUC = lm_AUC[[1]]$score
+      ) %>%
+      select(-lm_L, -lm_K, -lm_r, -lm_AUC) %>%
+      ungroup()
 
   # For interaction plot error bars
   delta_means_sds <- calculations %>%
@@ -509,179 +480,70 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
           Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
           TRUE ~ "No Effect"
         ),
-        lm_L = map2(.x = Z_lm_L, .y = Avg_Zscore_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-          list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-        } else {
-          lm(.y ~ .x) %>% {
-            list(
-              intercept = coef(.)[1],
-              slope = coef(.)[2],
-              r_squared = summary(.)$r.squared,
-              score = max_conc * coef(.)[2] + coef(.)[1]
-            )
-          }
-        }),
-        lm_K = map2(.x = Z_lm_K, .y = Avg_Zscore_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-          list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-        } else {
-          lm(.y ~ .x) %>% {
-            list(
-              intercept = coef(.)[1],
-              slope = coef(.)[2],
-              r_squared = summary(.)$r.squared,
-              score = max_conc * coef(.)[2] + coef(.)[1]
-            )
-          }
-        }),
-        lm_r = map2(.x = Z_lm_r, .y = Avg_Zscore_r, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-          list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-        } else {
-          lm(.y ~ .x) %>% {
-            list(
-              intercept = coef(.)[1],
-              slope = coef(.)[2],
-              r_squared = summary(.)$r.squared,
-              score = max_conc * coef(.)[2] + coef(.)[1]
-            )
-          }
-        }),
-        lm_AUC = map2(.x = Z_lm_AUC, .y = Avg_Zscore_AUC, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-          list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-        } else {
-          lm(.y ~ .x) %>% {
-            list(
-              intercept = coef(.)[1],
-              slope = coef(.)[2],
-              r_squared = summary(.)$r.squared,
-              score = max_conc * coef(.)[2] + coef(.)[1]
-            )
-          }
-        }),
-
-        # For correlation plots
-        Z_lm_K_L = map2(.x = Z_lm_K, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-          list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-        } else {
-          lm(.y ~ .x) %>% {
-            list(
-              intercept = coef(.)[1],
-              slope = coef(.)[2],
-              r_squared = summary(.)$r.squared,
-              score = max_conc * coef(.)[2] + coef(.)[1]
-            )
-          }
-        }),
-        Z_lm_r_L = map2(.x = Z_lm_r, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-          list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-        } else {
-          lm(.y ~ .x) %>% {
-            list(
-              intercept = coef(.)[1],
-              slope = coef(.)[2],
-              r_squared = summary(.)$r.squared,
-              score = max_conc * coef(.)[2] + coef(.)[1]
-            )
-          }
-        }),
-        Z_lm_R_AUC_L = map2(.x = Z_lm_AUC, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-          list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-        } else {
-          lm(.y ~ .x) %>% {
-            list(
-              intercept = coef(.)[1],
-              slope = coef(.)[2],
-              r_squared = summary(.)$r.squared,
-              score = max_conc * coef(.)[2] + coef(.)[1]
-            )
-          }
-        }),
-        Z_lm_R_r_K = map2(.x = Z_lm_r, .y = Z_lm_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-          list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-        } else {
-          lm(.y ~ .x) %>% {
-            list(
-              intercept = coef(.)[1],
-              slope = coef(.)[2],
-              r_squared = summary(.)$r.squared,
-              score = max_conc * coef(.)[2] + coef(.)[1]
-            )
-          }
-        }),
-        Z_lm_R_AUC_K = map2(.x = Z_lm_AUC, .y = Z_lm_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-          list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-        } else {
-          lm(.y ~ .x) %>% {
-            list(
-              intercept = coef(.)[1],
-              slope = coef(.)[2],
-              r_squared = summary(.)$r.squared,
-              score = max_conc * coef(.)[2] + coef(.)[1]
-            )
-          }
-        }),
-        Z_lm_R_AUC_r = map2(.x = Z_lm_AUC, .y = Z_lm_r, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) {
-          list(intercept = NA, slope = NA, r_squared = NA, score = NA)
-        } else {
-          lm(.y ~ .x) %>% {
-            list(
-              intercept = coef(.)[1],
-              slope = coef(.)[2],
-              r_squared = summary(.)$r.squared,
-              score = max_conc * coef(.)[2] + coef(.)[1]
-            )
-          }
-        }),
+        # Apply the perform_lm_simple function for each variable pair
+        lm_L = list(perform_lm_simple(Z_lm_L, Avg_Zscore_L, max_conc)),
+        lm_K = list(perform_lm_simple(Z_lm_K, Avg_Zscore_K, max_conc)),
+        lm_r = list(perform_lm_simple(Z_lm_r, Avg_Zscore_r, max_conc)),
+        lm_AUC = list(perform_lm_simple(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),
+
+        # Correlation models for various pairs
+        Z_lm_K_L = list(perform_lm_simple(Z_lm_K, Z_lm_L, max_conc)),
+        Z_lm_r_L = list(perform_lm_simple(Z_lm_r, Z_lm_L, max_conc)),
+        Z_lm_R_AUC_L = list(perform_lm_simple(Z_lm_AUC, Z_lm_L, max_conc)),
+        Z_lm_R_r_K = list(perform_lm_simple(Z_lm_r, Z_lm_K, max_conc)),
+        Z_lm_R_AUC_K = list(perform_lm_simple(Z_lm_AUC, Z_lm_K, max_conc)),
+        Z_lm_R_AUC_r = list(perform_lm_simple(Z_lm_AUC, Z_lm_r, max_conc)),
 
         # Extract coefficients and statistics for each model
-        lm_rank_intercept_L = map_dbl(lm_L, "intercept"),
-        lm_rank_slope_L = map_dbl(lm_L, "slope"),
-        R_Squared_L = map_dbl(lm_L, "r_squared"),
-        lm_Score_L = map_dbl(lm_L, "score"),
-
-        lm_intercept_K = map_dbl(lm_K, "intercept"),
-        lm_slope_K = map_dbl(lm_K, "slope"),
-        R_Squared_K = map_dbl(lm_K, "r_squared"),
-        lm_Score_K = map_dbl(lm_K, "score"),
-
-        lm_intercept_r = map_dbl(lm_r, "intercept"),
-        lm_slope_r = map_dbl(lm_r, "slope"),
-        R_Squared_r = map_dbl(lm_r, "r_squared"),
-        lm_Score_r = map_dbl(lm_r, "score"),
-
-        lm_intercept_AUC = map_dbl(lm_AUC, "intercept"),
-        lm_slope_AUC = map_dbl(lm_AUC, "slope"),
-        R_Squared_AUC = map_dbl(lm_AUC, "r_squared"),
-        lm_Score_AUC = map_dbl(lm_AUC, "score"),
-
-        Z_lm_intercept_K_L = map_dbl(Z_lm_K_L, "intercept"),
-        Z_lm_slope_K_L = map_dbl(Z_lm_K_L, "slope"),
-        Z_lm_R_squared_K_L = map_dbl(Z_lm_K_L, "r_squared"),
-        Z_lm_Score_K_L = map_dbl(Z_lm_K_L, "score"),
-
-        Z_lm_intercept_r_L = map_dbl(Z_lm_r_L, "intercept"),
-        Z_lm_slope_r_L = map_dbl(Z_lm_r_L, "slope"),
-        Z_lm_R_squared_r_L = map_dbl(Z_lm_r_L, "r_squared"),
-        Z_lm_Score_r_L = map_dbl(Z_lm_r_L, "score"),
-
-        Z_lm_intercept_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "intercept"),
-        Z_lm_slope_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "slope"),
-        Z_lm_R_squared_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "r_squared"),
-        Z_lm_Score_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "score"),
-
-        Z_lm_intercept_R_r_K = map_dbl(Z_lm_R_r_K, "intercept"),
-        Z_lm_slope_R_r_K = map_dbl(Z_lm_R_r_K, "slope"),
-        Z_lm_R_squared_R_r_K = map_dbl(Z_lm_R_r_K, "r_squared"),
-        Z_lm_Score_R_r_K = map_dbl(Z_lm_R_r_K, "score"),
-
-        Z_lm_intercept_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "intercept"),
-        Z_lm_slope_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "slope"),
-        Z_lm_R_squared_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "r_squared"),
-        Z_lm_Score_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "score"),
-
-        Z_lm_intercept_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "intercept"),
-        Z_lm_slope_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "slope"),
-        Z_lm_R_squared_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "r_squared"),
-        Z_lm_Score_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "score")
+        lm_rank_intercept_L = lm_L[[1]]$intercept,
+        lm_rank_slope_L = lm_L[[1]]$slope,
+        R_Squared_L = lm_L[[1]]$r_squared,
+        lm_Score_L = lm_L[[1]]$score,
+
+        lm_intercept_K = lm_K[[1]]$intercept,
+        lm_slope_K = lm_K[[1]]$slope,
+        R_Squared_K = lm_K[[1]]$r_squared,
+        lm_Score_K = lm_K[[1]]$score,
+
+        lm_intercept_r = lm_r[[1]]$intercept,
+        lm_slope_r = lm_r[[1]]$slope,
+        R_Squared_r = lm_r[[1]]$r_squared,
+        lm_Score_r = lm_r[[1]]$score,
+
+        lm_intercept_AUC = lm_AUC[[1]]$intercept,
+        lm_slope_AUC = lm_AUC[[1]]$slope,
+        R_Squared_AUC = lm_AUC[[1]]$r_squared,
+        lm_Score_AUC = lm_AUC[[1]]$score,
+
+        Z_lm_intercept_K_L = Z_lm_K_L[[1]]$intercept,
+        Z_lm_slope_K_L = Z_lm_K_L[[1]]$slope,
+        Z_lm_R_squared_K_L = Z_lm_K_L[[1]]$r_squared,
+        Z_lm_Score_K_L = Z_lm_K_L[[1]]$score,
+
+        Z_lm_intercept_r_L = Z_lm_r_L[[1]]$intercept,
+        Z_lm_slope_r_L = Z_lm_r_L[[1]]$slope,
+        Z_lm_R_squared_r_L = Z_lm_r_L[[1]]$r_squared,
+        Z_lm_Score_r_L = Z_lm_r_L[[1]]$score,
+
+        Z_lm_intercept_R_AUC_L = Z_lm_R_AUC_L[[1]]$intercept,
+        Z_lm_slope_R_AUC_L = Z_lm_R_AUC_L[[1]]$slope,
+        Z_lm_R_squared_R_AUC_L = Z_lm_R_AUC_L[[1]]$r_squared,
+        Z_lm_Score_R_AUC_L = Z_lm_R_AUC_L[[1]]$score,
+
+        Z_lm_intercept_R_r_K = Z_lm_R_r_K[[1]]$intercept,
+        Z_lm_slope_R_r_K = Z_lm_R_r_K[[1]]$slope,
+        Z_lm_R_squared_R_r_K = Z_lm_R_r_K[[1]]$r_squared,
+        Z_lm_Score_R_r_K = Z_lm_R_r_K[[1]]$score,
+
+        Z_lm_intercept_R_AUC_K = Z_lm_R_AUC_K[[1]]$intercept,
+        Z_lm_slope_R_AUC_K = Z_lm_R_AUC_K[[1]]$slope,
+        Z_lm_R_squared_R_AUC_K = Z_lm_R_AUC_K[[1]]$r_squared,
+        Z_lm_Score_R_AUC_K = Z_lm_R_AUC_K[[1]]$score,
+
+        Z_lm_intercept_R_AUC_r = Z_lm_R_AUC_r[[1]]$intercept,
+        Z_lm_slope_R_AUC_r = Z_lm_R_AUC_r[[1]]$slope,
+        Z_lm_R_squared_R_AUC_r = Z_lm_R_AUC_r[[1]]$r_squared,
+        Z_lm_Score_R_AUC_r = Z_lm_R_AUC_r[[1]]$score
       ) %>%
       select(
         -lm_L, -lm_K, -lm_r, -lm_AUC, -Z_lm_K_L, -Z_lm_r_L, -Z_lm_R_AUC_L,