|
@@ -281,7 +281,7 @@ calculate_interaction_scores <- function(df, max_conc) {
|
|
|
|
|
|
interactions <- stats %>%
|
|
|
group_by(across(all_of(group_vars))) %>%
|
|
|
- summarise(
|
|
|
+ mutate(
|
|
|
OrfRep = first(OrfRep),
|
|
|
Gene = first(Gene),
|
|
|
num = first(num),
|
|
@@ -294,33 +294,47 @@ calculate_interaction_scores <- function(df, max_conc) {
|
|
|
Z_Shift_L = first(Z_Shift_L),
|
|
|
Z_Shift_K = first(Z_Shift_K),
|
|
|
Z_Shift_r = first(Z_Shift_r),
|
|
|
- Z_Shift_AUC = first(Z_Shift_AUC),
|
|
|
+ Z_Shift_AUC = first(Z_Shift_AUC)
|
|
|
+ )
|
|
|
+
|
|
|
+ # Summarise the data to calculate summary statistics
|
|
|
+ summary_stats <- interactions %>%
|
|
|
+ summarise(
|
|
|
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
|
|
|
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
|
|
|
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
|
|
|
Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE),
|
|
|
- lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1],
|
|
|
- lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1],
|
|
|
- lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1],
|
|
|
- lm_Score_AUC = max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1],
|
|
|
- R_Squared_L = summary(lm_L)$r.squared,
|
|
|
- R_Squared_K = summary(lm_K)$r.squared,
|
|
|
- R_Squared_r = summary(lm_r)$r.squared,
|
|
|
- R_Squared_AUC = summary(lm_AUC)$r.squared,
|
|
|
- lm_intercept_L = coef(lm_L)[1],
|
|
|
- lm_slope_L = coef(lm_L)[2],
|
|
|
- lm_intercept_K = coef(lm_K)[1],
|
|
|
- lm_slope_K = coef(lm_K)[2],
|
|
|
- lm_intercept_r = coef(lm_r)[1],
|
|
|
- lm_slope_r = coef(lm_r)[2],
|
|
|
- lm_intercept_AUC = coef(lm_AUC)[1],
|
|
|
- lm_slope_AUC = coef(lm_AUC)[2],
|
|
|
+ lm_Score_L = max(conc_num) * coef(lm(Zscore_L ~ conc_num))[2] + coef(lm(Zscore_L ~ conc_num))[1],
|
|
|
+ lm_Score_K = max(conc_num) * coef(lm(Zscore_K ~ conc_num))[2] + coef(lm(Zscore_K ~ conc_num))[1],
|
|
|
+ lm_Score_r = max(conc_num) * coef(lm(Zscore_r ~ conc_num))[2] + coef(lm(Zscore_r ~ conc_num))[1],
|
|
|
+ lm_Score_AUC = max(conc_num) * coef(lm(Zscore_AUC ~ conc_num))[2] + coef(lm(Zscore_AUC ~ conc_num))[1],
|
|
|
+ R_Squared_L = summary(lm(Zscore_L ~ conc_num))$r.squared,
|
|
|
+ R_Squared_K = summary(lm(Zscore_K ~ conc_num))$r.squared,
|
|
|
+ R_Squared_r = summary(lm(Zscore_r ~ conc_num))$r.squared,
|
|
|
+ R_Squared_AUC = summary(lm(Zscore_AUC ~ conc_num))$r.squared,
|
|
|
+ lm_intercept_L = coef(lm(Zscore_L ~ conc_num))[1],
|
|
|
+ lm_slope_L = coef(lm(Zscore_L ~ conc_num))[2],
|
|
|
+ lm_intercept_K = coef(lm(Zscore_K ~ conc_num))[1],
|
|
|
+ lm_slope_K = coef(lm(Zscore_K ~ conc_num))[2],
|
|
|
+ lm_intercept_r = coef(lm(Zscore_r ~ conc_num))[1],
|
|
|
+ lm_slope_r = coef(lm(Zscore_r ~ conc_num))[2],
|
|
|
+ lm_intercept_AUC = coef(lm(Zscore_AUC ~ conc_num))[1],
|
|
|
+ lm_slope_AUC = coef(lm(Zscore_AUC ~ conc_num))[2],
|
|
|
NG = sum(NG, na.rm = TRUE),
|
|
|
DB = sum(DB, na.rm = TRUE),
|
|
|
SM = sum(SM, na.rm = TRUE),
|
|
|
.groups = "keep"
|
|
|
)
|
|
|
|
|
|
+ # Join the summary data back to the original data
|
|
|
+ cleaned_interactions <- interactions %>%
|
|
|
+ select(-any_of(intersect(names(interactions), names(summary_stats))))
|
|
|
+ interactions_joined <- left_join(cleaned_interactions, summary_stats, by = group_vars)
|
|
|
+ interactions_joined <- interactions_joined %>% distinct()
|
|
|
+
|
|
|
+ # Remove duplicate rows if necessary
|
|
|
+ interactions <- interactions %>% distinct()
|
|
|
+
|
|
|
num_non_removed_concs <- total_conc_num - sum(stats$DB, na.rm = TRUE) - 1
|
|
|
|
|
|
interactions <- interactions %>%
|
|
@@ -353,10 +367,12 @@ calculate_interaction_scores <- function(df, max_conc) {
|
|
|
"Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
|
|
|
"NG", "SM", "DB")
|
|
|
|
|
|
- calculations_joined <- df %>% select(-any_of(setdiff(names(calculations), c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))))
|
|
|
+ calculations_joined <- df %>%
|
|
|
+ select(-any_of(intersect(names(df), names(calculations))))
|
|
|
calculations_joined <- left_join(calculations_joined, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
|
|
|
|
|
- interactions_joined <- df %>% select(-any_of(setdiff(names(interactions), c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))))
|
|
|
+ interactions_joined <- df %>%
|
|
|
+ select(-any_of(intersect(names(df), names(interactions))))
|
|
|
interactions_joined <- left_join(interactions_joined, interactions, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
|
|
|
|
|
return(list(calculations = calculations, interactions = interactions, interactions_joined = interactions_joined,
|
|
@@ -1234,7 +1250,7 @@ main <- function() {
|
|
|
|
|
|
# TODO trying out some parallelization
|
|
|
# future::plan(future::multicore, workers = parallel::detectCores())
|
|
|
- future::plan(future::multicore, workers = 3)
|
|
|
+ future::plan(future::multisession, workers = 3)
|
|
|
|
|
|
plot_configs <- list(
|
|
|
list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control",
|
|
@@ -1257,19 +1273,10 @@ main <- function() {
|
|
|
plot_configs = delta_bg_outside_2sd_k_plot_configs)
|
|
|
)
|
|
|
|
|
|
- furrr::future_map(plot_configs, function(config) {
|
|
|
- generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
|
|
|
- }, .options = furrr_options(seed = TRUE))
|
|
|
-
|
|
|
- # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
|
|
|
- # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
|
|
|
- # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
|
|
|
- # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plot_configs)
|
|
|
- # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplot_configs)
|
|
|
- # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plot_configs)
|
|
|
- # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplot_configs)
|
|
|
- # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
|
|
|
- # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
|
|
|
+ # Generating quality control plots in parallel
|
|
|
+ # furrr::future_map(plot_configs, function(config) {
|
|
|
+ # generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
|
|
|
+ # }, .options = furrr_options(seed = TRUE))
|
|
|
|
|
|
# Process background strains
|
|
|
bg_strains <- c("YDL227C")
|
|
@@ -1435,7 +1442,7 @@ main <- function() {
|
|
|
message("Filtering and reranking plots")
|
|
|
# Formerly X_NArm
|
|
|
zscores_interactions_filtered <- zscores_interactions_joined %>%
|
|
|
- filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>%
|
|
|
+ filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>%
|
|
|
mutate(
|
|
|
Overlap = case_when(
|
|
|
Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both",
|