Browse Source

Fix rank column names

Bryan Roessler 7 tháng trước cách đây
mục cha
commit
b4875430f8

+ 71 - 25
qhtcp-workflow/apps/r/calculate_interaction_zscores.R

@@ -6,6 +6,16 @@ suppressMessages({
   library("rlang")
   library("rlang")
   library("ggthemes")
   library("ggthemes")
   library("data.table")
   library("data.table")
+  library("future")
+  library("furrr")
+  library("purrr")
+})
+
+# These parallelization libraries are very noisy
+suppressPackageStartupMessages({
+  library("future")
+  library("furrr")
+  library("purrr")
 })
 })
 
 
 options(warn = 2)
 options(warn = 2)
@@ -353,8 +363,8 @@ calculate_interaction_scores <- function(df, max_conc) {
     calculations_joined = calculations_joined))
     calculations_joined = calculations_joined))
 }
 }
 
 
-generate_and_save_plots <- function(out_dir, file_name, plot_configs, grid_layout = NULL) {
-  message("Generating ", file_name, ".pdf and ", file_name, ".html")
+generate_and_save_plots <- function(out_dir, filename, plot_configs, grid_layout = NULL) {
+  message("Generating ", filename, ".pdf and ", filename, ".html")
 
 
   # Prepare lists to collect plots
   # Prepare lists to collect plots
   static_plots <- list()
   static_plots <- list()
@@ -441,7 +451,7 @@ generate_and_save_plots <- function(out_dir, file_name, plot_configs, grid_layou
   }
   }
 
 
   # Save static PDF plots
   # Save static PDF plots
-  pdf(file.path(out_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
+  pdf(file.path(out_dir, paste0(filename, ".pdf")), width = 14, height = 9)
   lapply(static_plots, print)
   lapply(static_plots, print)
   dev.off()
   dev.off()
 
 
@@ -456,7 +466,7 @@ generate_and_save_plots <- function(out_dir, file_name, plot_configs, grid_layou
     },
     },
     margin = 0.05
     margin = 0.05
   )
   )
-  saveWidget(combined_plot, file = file.path(out_dir, paste0(file_name, ".html")), selfcontained = TRUE)
+  saveWidget(combined_plot, file = file.path(out_dir, paste0(filename, ".html")), selfcontained = TRUE)
 }
 }
 
 
 generate_scatter_plot <- function(plot, config) {
 generate_scatter_plot <- function(plot, config) {
@@ -971,6 +981,8 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad
 
 
   avg_zscore_cols <- paste0("Avg_Zscore_", variables)
   avg_zscore_cols <- paste0("Avg_Zscore_", variables)
   z_lm_cols <- paste0("Z_lm_", variables)
   z_lm_cols <- paste0("Z_lm_", variables)
+  rank_avg_zscore_cols <- paste0("Rank_", variables)
+  rank_z_lm_cols <- paste0("Rank_lm_", variables)
 
 
   if (filter_nf) {
   if (filter_nf) {
     message("Filtering non-finite values")
     message("Filtering non-finite values")
@@ -1008,10 +1020,14 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad
   # TODO probably should be moved to separate function
   # TODO probably should be moved to separate function
   if (rank) {
   if (rank) {
     message("Calculating ranks for Avg_Zscore_ and Z_lm_ columns")
     message("Calculating ranks for Avg_Zscore_ and Z_lm_ columns")
+
+    rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
     df <- df %>%
     df <- df %>%
-      mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "Rank_{.col}"))
+      mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
+
+    rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
     df <- df %>%
     df <- df %>%
-      mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "Rank_lm_{.col}"))
+      mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
   }
   }
 
 
   return(df)
   return(df)
@@ -1096,7 +1112,7 @@ main <- function() {
       row.names = FALSE)
       row.names = FALSE)
     
     
     # Each plots list corresponds to a file
     # Each plots list corresponds to a file
-    l_vs_k_plots <- list(
+    l_vs_k_plot_configs <- list(
       list(
       list(
         df = df,
         df = df,
         x_var = "L",
         x_var = "L",
@@ -1110,7 +1126,7 @@ main <- function() {
       )
       )
     )
     )
 
 
-    frequency_delta_bg_plots <- list(
+    frequency_delta_bg_plot_configs <- list(
       list(
       list(
         df = df_filtered_stats,
         df = df_filtered_stats,
         x_var = "delta_bg",
         x_var = "delta_bg",
@@ -1135,7 +1151,7 @@ main <- function() {
         legend_position = "right")
         legend_position = "right")
     )
     )
 
 
-    above_threshold_plots <- list(
+    above_threshold_plot_configs <- list(
       list(
       list(
         df = df_above_tolerance,
         df = df_above_tolerance,
         x_var = "L",
         x_var = "L",
@@ -1184,7 +1200,7 @@ main <- function() {
       plot_type = "box"
       plot_type = "box"
     )
     )
 
 
-    l_outside_2sd_k_plots <- list(
+    l_outside_2sd_k_plot_configs <- list(
       list(
       list(
         df = df_na_l_outside_2sd_k_stats,
         df = df_na_l_outside_2sd_k_stats,
         x_var = "L",
         x_var = "L",
@@ -1198,7 +1214,7 @@ main <- function() {
       )
       )
     )
     )
 
 
-    delta_bg_outside_2sd_k_plots <- list(
+    delta_bg_outside_2sd_k_plot_configs <- list(
       list(
       list(
         df = df_na_l_outside_2sd_k_stats,
         df = df_na_l_outside_2sd_k_stats,
         x_var = "delta_bg",
         x_var = "delta_bg",
@@ -1213,15 +1229,45 @@ main <- function() {
     )
     )
 
 
     message("Generating quality control plots")
     message("Generating quality control plots")
-    generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
-    generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
-    generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
-    generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plot_configs)
-    generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplot_configs)
-    generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plot_configs)
-    generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplot_configs)
-    generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
-    generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
+
+    # TODO trying out some parallelization
+    # future::plan(future::multicore, workers = parallel::detectCores())
+    future::plan(future::multicore, workers = 3)
+
+    plot_configs <- list(
+      list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control",
+        plot_configs = l_vs_k_plot_configs),
+      list(out_dir = out_dir_qc, filename = "frequency_delta_background",
+        plot_configs = frequency_delta_bg_plot_configs),
+      list(out_dir = out_dir_qc, filename = "L_vs_K_above_threshold",
+        plot_configs = above_threshold_plot_configs),
+      list(out_dir = out_dir_qc, filename = "plate_analysis",
+        plot_configs = plate_analysis_plot_configs),
+      list(out_dir = out_dir_qc, filename = "plate_analysis_boxplots",
+        plot_configs = plate_analysis_boxplot_configs),
+      list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros",
+        plot_configs = plate_analysis_no_zeros_plot_configs),
+      list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros_boxplots",
+        plot_configs = plate_analysis_no_zeros_boxplot_configs),
+      list(out_dir = out_dir_qc, name = "L_vs_K_for_strains_2SD_outside_mean_K",
+        plot_configs = l_outside_2sd_k_plot_configs),
+      list(out_dir = out_dir_qc, name = "delta_background_vs_K_for_strains_2sd_outside_mean_K",
+        plot_configs = delta_bg_outside_2sd_k_plot_configs)
+    )
+
+    furrr::future_map(plot_configs, function(config) {
+      generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
+    }, .options = furrr_options(seed = TRUE))
+
+    # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
+    # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
+    # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
+    # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plot_configs)
+    # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplot_configs)
+    # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plot_configs)
+    # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplot_configs)
+    # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
+    # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
 
 
     # Process background strains
     # Process background strains
     bg_strains <- c("YDL227C")
     bg_strains <- c("YDL227C")
@@ -1372,7 +1418,7 @@ main <- function() {
         variables = interaction_vars,
         variables = interaction_vars,
         is_lm = FALSE
         is_lm = FALSE
       )
       )
-      generate_and_save_plots(out_dir = out_dir, file_name = "RankPlots",
+      generate_and_save_plots(out_dir = out_dir, filename = "RankPlots",
         plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
         plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
 
 
       message("Generating ranked linear model plots")
       message("Generating ranked linear model plots")
@@ -1381,7 +1427,7 @@ main <- function() {
         variables = interaction_vars,
         variables = interaction_vars,
         is_lm = TRUE
         is_lm = TRUE
       )
       )
-      generate_and_save_plots(out_dir = out_dir, file_name = "RankPlots_lm",
+      generate_and_save_plots(out_dir = out_dir, filename = "RankPlots_lm",
         plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
         plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
 
 
       message("Filtering and reranking plots")
       message("Filtering and reranking plots")
@@ -1422,7 +1468,7 @@ main <- function() {
       message("Generating filtered ranked plots")
       message("Generating filtered ranked plots")
       generate_and_save_plots(
       generate_and_save_plots(
         out_dir = out_dir,
         out_dir = out_dir,
-        file_name = "RankPlots_na_rm",
+        filename = "RankPlots_na_rm",
         plot_configs = rank_plot_filtered_configs,
         plot_configs = rank_plot_filtered_configs,
         grid_layout = list(ncol = 3, nrow = 2))
         grid_layout = list(ncol = 3, nrow = 2))
 
 
@@ -1434,7 +1480,7 @@ main <- function() {
       )
       )
       generate_and_save_plots(
       generate_and_save_plots(
         out_dir = out_dir,
         out_dir = out_dir,
-        file_name = "RankPlots_lm_na_rm",
+        filename = "RankPlots_lm_na_rm",
         plot_configs = rank_plot_lm_filtered_configs,
         plot_configs = rank_plot_lm_filtered_configs,
         grid_layout = list(ncol = 3, nrow = 2))
         grid_layout = list(ncol = 3, nrow = 2))
 
 
@@ -1442,7 +1488,7 @@ main <- function() {
       correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered)
       correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered)
       generate_and_save_plots(
       generate_and_save_plots(
         out_dir = out_dir,
         out_dir = out_dir,
-        file_name = "Avg_Zscore_vs_lm_NA_rm",
+        filename = "Avg_Zscore_vs_lm_NA_rm",
         plot_configs = correlation_plot_configs,
         plot_configs = correlation_plot_configs,
         grid_layout = list(ncol = 2, nrow = 2))
         grid_layout = list(ncol = 2, nrow = 2))
     })
     })

+ 1 - 1
qhtcp-workflow/qhtcp-workflow

@@ -635,7 +635,7 @@ install_dependencies() {
   depends_r=(
   depends_r=(
     BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 
     BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 
     dplyr rlang data.table unix gridExtra gplots stringr plotly ggthemes pandoc
     dplyr rlang data.table unix gridExtra gplots stringr plotly ggthemes pandoc
-    rmarkdown htmlwidgets gdata Hmisc)
+    rmarkdown htmlwidgets gdata Hmisc future furrr)
   depends_bioc=(UCSC.utils org.Sc.sgd.db)
   depends_bioc=(UCSC.utils org.Sc.sgd.db)
 
 
   [[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars
   [[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars