diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 68fed015..f0226dfb 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -6,6 +6,16 @@ suppressMessages({ library("rlang") library("ggthemes") library("data.table") + library("future") + library("furrr") + library("purrr") +}) + +# These parallelization libraries are very noisy +suppressPackageStartupMessages({ + library("future") + library("furrr") + library("purrr") }) options(warn = 2) @@ -353,8 +363,8 @@ calculate_interaction_scores <- function(df, max_conc) { calculations_joined = calculations_joined)) } -generate_and_save_plots <- function(out_dir, file_name, plot_configs, grid_layout = NULL) { - message("Generating ", file_name, ".pdf and ", file_name, ".html") +generate_and_save_plots <- function(out_dir, filename, plot_configs, grid_layout = NULL) { + message("Generating ", filename, ".pdf and ", filename, ".html") # Prepare lists to collect plots static_plots <- list() @@ -441,7 +451,7 @@ generate_and_save_plots <- function(out_dir, file_name, plot_configs, grid_layou } # Save static PDF plots - pdf(file.path(out_dir, paste0(file_name, ".pdf")), width = 14, height = 9) + pdf(file.path(out_dir, paste0(filename, ".pdf")), width = 14, height = 9) lapply(static_plots, print) dev.off() @@ -456,7 +466,7 @@ generate_and_save_plots <- function(out_dir, file_name, plot_configs, grid_layou }, margin = 0.05 ) - saveWidget(combined_plot, file = file.path(out_dir, paste0(file_name, ".html")), selfcontained = TRUE) + saveWidget(combined_plot, file = file.path(out_dir, paste0(filename, ".html")), selfcontained = TRUE) } generate_scatter_plot <- function(plot, config) { @@ -971,6 +981,8 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad avg_zscore_cols <- paste0("Avg_Zscore_", variables) z_lm_cols <- paste0("Z_lm_", variables) + rank_avg_zscore_cols <- paste0("Rank_", variables) + rank_z_lm_cols <- paste0("Rank_lm_", variables) if (filter_nf) { message("Filtering non-finite values") @@ -1008,10 +1020,14 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad # TODO probably should be moved to separate function if (rank) { message("Calculating ranks for Avg_Zscore_ and Z_lm_ columns") + + rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols) df <- df %>% - mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "Rank_{.col}")) + mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}")) + + rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols) df <- df %>% - mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "Rank_lm_{.col}")) + mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}")) } return(df) @@ -1096,7 +1112,7 @@ main <- function() { row.names = FALSE) # Each plots list corresponds to a file - l_vs_k_plots <- list( + l_vs_k_plot_configs <- list( list( df = df, x_var = "L", @@ -1110,7 +1126,7 @@ main <- function() { ) ) - frequency_delta_bg_plots <- list( + frequency_delta_bg_plot_configs <- list( list( df = df_filtered_stats, x_var = "delta_bg", @@ -1135,7 +1151,7 @@ main <- function() { legend_position = "right") ) - above_threshold_plots <- list( + above_threshold_plot_configs <- list( list( df = df_above_tolerance, x_var = "L", @@ -1184,7 +1200,7 @@ main <- function() { plot_type = "box" ) - l_outside_2sd_k_plots <- list( + l_outside_2sd_k_plot_configs <- list( list( df = df_na_l_outside_2sd_k_stats, x_var = "L", @@ -1198,7 +1214,7 @@ main <- function() { ) ) - delta_bg_outside_2sd_k_plots <- list( + delta_bg_outside_2sd_k_plot_configs <- list( list( df = df_na_l_outside_2sd_k_stats, x_var = "delta_bg", @@ -1213,15 +1229,45 @@ main <- function() { ) message("Generating quality control plots") - generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) - generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) - generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) - generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plot_configs) - generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplot_configs) - generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plot_configs) - generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplot_configs) - generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) - generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) + + # TODO trying out some parallelization + # future::plan(future::multicore, workers = parallel::detectCores()) + future::plan(future::multicore, workers = 3) + + plot_configs <- list( + list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control", + plot_configs = l_vs_k_plot_configs), + list(out_dir = out_dir_qc, filename = "frequency_delta_background", + plot_configs = frequency_delta_bg_plot_configs), + list(out_dir = out_dir_qc, filename = "L_vs_K_above_threshold", + plot_configs = above_threshold_plot_configs), + list(out_dir = out_dir_qc, filename = "plate_analysis", + plot_configs = plate_analysis_plot_configs), + list(out_dir = out_dir_qc, filename = "plate_analysis_boxplots", + plot_configs = plate_analysis_boxplot_configs), + list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros", + plot_configs = plate_analysis_no_zeros_plot_configs), + list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros_boxplots", + plot_configs = plate_analysis_no_zeros_boxplot_configs), + list(out_dir = out_dir_qc, name = "L_vs_K_for_strains_2SD_outside_mean_K", + plot_configs = l_outside_2sd_k_plot_configs), + list(out_dir = out_dir_qc, name = "delta_background_vs_K_for_strains_2sd_outside_mean_K", + plot_configs = delta_bg_outside_2sd_k_plot_configs) + ) + + furrr::future_map(plot_configs, function(config) { + generate_and_save_plots(config$out_dir, config$filename, config$plot_configs) + }, .options = furrr_options(seed = TRUE)) + + # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) + # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) + # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) + # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plot_configs) + # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplot_configs) + # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plot_configs) + # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplot_configs) + # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) + # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) # Process background strains bg_strains <- c("YDL227C") @@ -1372,7 +1418,7 @@ main <- function() { variables = interaction_vars, is_lm = FALSE ) - generate_and_save_plots(out_dir = out_dir, file_name = "RankPlots", + generate_and_save_plots(out_dir = out_dir, filename = "RankPlots", plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) message("Generating ranked linear model plots") @@ -1381,7 +1427,7 @@ main <- function() { variables = interaction_vars, is_lm = TRUE ) - generate_and_save_plots(out_dir = out_dir, file_name = "RankPlots_lm", + generate_and_save_plots(out_dir = out_dir, filename = "RankPlots_lm", plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) message("Filtering and reranking plots") @@ -1422,7 +1468,7 @@ main <- function() { message("Generating filtered ranked plots") generate_and_save_plots( out_dir = out_dir, - file_name = "RankPlots_na_rm", + filename = "RankPlots_na_rm", plot_configs = rank_plot_filtered_configs, grid_layout = list(ncol = 3, nrow = 2)) @@ -1434,7 +1480,7 @@ main <- function() { ) generate_and_save_plots( out_dir = out_dir, - file_name = "RankPlots_lm_na_rm", + filename = "RankPlots_lm_na_rm", plot_configs = rank_plot_lm_filtered_configs, grid_layout = list(ncol = 3, nrow = 2)) @@ -1442,7 +1488,7 @@ main <- function() { correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered) generate_and_save_plots( out_dir = out_dir, - file_name = "Avg_Zscore_vs_lm_NA_rm", + filename = "Avg_Zscore_vs_lm_NA_rm", plot_configs = correlation_plot_configs, grid_layout = list(ncol = 2, nrow = 2)) }) diff --git a/qhtcp-workflow/qhtcp-workflow b/qhtcp-workflow/qhtcp-workflow index 608cae02..c6225ee9 100755 --- a/qhtcp-workflow/qhtcp-workflow +++ b/qhtcp-workflow/qhtcp-workflow @@ -635,7 +635,7 @@ install_dependencies() { depends_r=( BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 dplyr rlang data.table unix gridExtra gplots stringr plotly ggthemes pandoc - rmarkdown htmlwidgets gdata Hmisc) + rmarkdown htmlwidgets gdata Hmisc future furrr) depends_bioc=(UCSC.utils org.Sc.sgd.db) [[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars