Fix rank column names

This commit is contained in:
2024-09-18 15:42:09 -04:00
parent 452d9e4162
commit b4875430f8
2 changed files with 72 additions and 26 deletions

View File

@@ -6,6 +6,16 @@ suppressMessages({
library("rlang") library("rlang")
library("ggthemes") library("ggthemes")
library("data.table") library("data.table")
library("future")
library("furrr")
library("purrr")
})
# These parallelization libraries are very noisy
suppressPackageStartupMessages({
library("future")
library("furrr")
library("purrr")
}) })
options(warn = 2) options(warn = 2)
@@ -353,8 +363,8 @@ calculate_interaction_scores <- function(df, max_conc) {
calculations_joined = calculations_joined)) calculations_joined = calculations_joined))
} }
generate_and_save_plots <- function(out_dir, file_name, plot_configs, grid_layout = NULL) { generate_and_save_plots <- function(out_dir, filename, plot_configs, grid_layout = NULL) {
message("Generating ", file_name, ".pdf and ", file_name, ".html") message("Generating ", filename, ".pdf and ", filename, ".html")
# Prepare lists to collect plots # Prepare lists to collect plots
static_plots <- list() static_plots <- list()
@@ -441,7 +451,7 @@ generate_and_save_plots <- function(out_dir, file_name, plot_configs, grid_layou
} }
# Save static PDF plots # Save static PDF plots
pdf(file.path(out_dir, paste0(file_name, ".pdf")), width = 14, height = 9) pdf(file.path(out_dir, paste0(filename, ".pdf")), width = 14, height = 9)
lapply(static_plots, print) lapply(static_plots, print)
dev.off() dev.off()
@@ -456,7 +466,7 @@ generate_and_save_plots <- function(out_dir, file_name, plot_configs, grid_layou
}, },
margin = 0.05 margin = 0.05
) )
saveWidget(combined_plot, file = file.path(out_dir, paste0(file_name, ".html")), selfcontained = TRUE) saveWidget(combined_plot, file = file.path(out_dir, paste0(filename, ".html")), selfcontained = TRUE)
} }
generate_scatter_plot <- function(plot, config) { generate_scatter_plot <- function(plot, config) {
@@ -971,6 +981,8 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad
avg_zscore_cols <- paste0("Avg_Zscore_", variables) avg_zscore_cols <- paste0("Avg_Zscore_", variables)
z_lm_cols <- paste0("Z_lm_", variables) z_lm_cols <- paste0("Z_lm_", variables)
rank_avg_zscore_cols <- paste0("Rank_", variables)
rank_z_lm_cols <- paste0("Rank_lm_", variables)
if (filter_nf) { if (filter_nf) {
message("Filtering non-finite values") message("Filtering non-finite values")
@@ -1008,10 +1020,14 @@ process_data <- function(df, variables, filter_nf = FALSE, filter_na = FALSE, ad
# TODO probably should be moved to separate function # TODO probably should be moved to separate function
if (rank) { if (rank) {
message("Calculating ranks for Avg_Zscore_ and Z_lm_ columns") message("Calculating ranks for Avg_Zscore_ and Z_lm_ columns")
rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
df <- df %>% df <- df %>%
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "Rank_{.col}")) mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
df <- df %>% df <- df %>%
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "Rank_lm_{.col}")) mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
} }
return(df) return(df)
@@ -1096,7 +1112,7 @@ main <- function() {
row.names = FALSE) row.names = FALSE)
# Each plots list corresponds to a file # Each plots list corresponds to a file
l_vs_k_plots <- list( l_vs_k_plot_configs <- list(
list( list(
df = df, df = df,
x_var = "L", x_var = "L",
@@ -1110,7 +1126,7 @@ main <- function() {
) )
) )
frequency_delta_bg_plots <- list( frequency_delta_bg_plot_configs <- list(
list( list(
df = df_filtered_stats, df = df_filtered_stats,
x_var = "delta_bg", x_var = "delta_bg",
@@ -1135,7 +1151,7 @@ main <- function() {
legend_position = "right") legend_position = "right")
) )
above_threshold_plots <- list( above_threshold_plot_configs <- list(
list( list(
df = df_above_tolerance, df = df_above_tolerance,
x_var = "L", x_var = "L",
@@ -1184,7 +1200,7 @@ main <- function() {
plot_type = "box" plot_type = "box"
) )
l_outside_2sd_k_plots <- list( l_outside_2sd_k_plot_configs <- list(
list( list(
df = df_na_l_outside_2sd_k_stats, df = df_na_l_outside_2sd_k_stats,
x_var = "L", x_var = "L",
@@ -1198,7 +1214,7 @@ main <- function() {
) )
) )
delta_bg_outside_2sd_k_plots <- list( delta_bg_outside_2sd_k_plot_configs <- list(
list( list(
df = df_na_l_outside_2sd_k_stats, df = df_na_l_outside_2sd_k_stats,
x_var = "delta_bg", x_var = "delta_bg",
@@ -1213,15 +1229,45 @@ main <- function() {
) )
message("Generating quality control plots") message("Generating quality control plots")
generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) # TODO trying out some parallelization
generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) # future::plan(future::multicore, workers = parallel::detectCores())
generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plot_configs) future::plan(future::multicore, workers = 3)
generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplot_configs)
generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plot_configs) plot_configs <- list(
generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplot_configs) list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control",
generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) plot_configs = l_vs_k_plot_configs),
generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) list(out_dir = out_dir_qc, filename = "frequency_delta_background",
plot_configs = frequency_delta_bg_plot_configs),
list(out_dir = out_dir_qc, filename = "L_vs_K_above_threshold",
plot_configs = above_threshold_plot_configs),
list(out_dir = out_dir_qc, filename = "plate_analysis",
plot_configs = plate_analysis_plot_configs),
list(out_dir = out_dir_qc, filename = "plate_analysis_boxplots",
plot_configs = plate_analysis_boxplot_configs),
list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros",
plot_configs = plate_analysis_no_zeros_plot_configs),
list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros_boxplots",
plot_configs = plate_analysis_no_zeros_boxplot_configs),
list(out_dir = out_dir_qc, name = "L_vs_K_for_strains_2SD_outside_mean_K",
plot_configs = l_outside_2sd_k_plot_configs),
list(out_dir = out_dir_qc, name = "delta_background_vs_K_for_strains_2sd_outside_mean_K",
plot_configs = delta_bg_outside_2sd_k_plot_configs)
)
furrr::future_map(plot_configs, function(config) {
generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
}, .options = furrr_options(seed = TRUE))
# generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
# generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
# generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
# generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plot_configs)
# generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplot_configs)
# generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plot_configs)
# generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplot_configs)
# generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
# generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
# Process background strains # Process background strains
bg_strains <- c("YDL227C") bg_strains <- c("YDL227C")
@@ -1372,7 +1418,7 @@ main <- function() {
variables = interaction_vars, variables = interaction_vars,
is_lm = FALSE is_lm = FALSE
) )
generate_and_save_plots(out_dir = out_dir, file_name = "RankPlots", generate_and_save_plots(out_dir = out_dir, filename = "RankPlots",
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
message("Generating ranked linear model plots") message("Generating ranked linear model plots")
@@ -1381,7 +1427,7 @@ main <- function() {
variables = interaction_vars, variables = interaction_vars,
is_lm = TRUE is_lm = TRUE
) )
generate_and_save_plots(out_dir = out_dir, file_name = "RankPlots_lm", generate_and_save_plots(out_dir = out_dir, filename = "RankPlots_lm",
plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2)) plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
message("Filtering and reranking plots") message("Filtering and reranking plots")
@@ -1422,7 +1468,7 @@ main <- function() {
message("Generating filtered ranked plots") message("Generating filtered ranked plots")
generate_and_save_plots( generate_and_save_plots(
out_dir = out_dir, out_dir = out_dir,
file_name = "RankPlots_na_rm", filename = "RankPlots_na_rm",
plot_configs = rank_plot_filtered_configs, plot_configs = rank_plot_filtered_configs,
grid_layout = list(ncol = 3, nrow = 2)) grid_layout = list(ncol = 3, nrow = 2))
@@ -1434,7 +1480,7 @@ main <- function() {
) )
generate_and_save_plots( generate_and_save_plots(
out_dir = out_dir, out_dir = out_dir,
file_name = "RankPlots_lm_na_rm", filename = "RankPlots_lm_na_rm",
plot_configs = rank_plot_lm_filtered_configs, plot_configs = rank_plot_lm_filtered_configs,
grid_layout = list(ncol = 3, nrow = 2)) grid_layout = list(ncol = 3, nrow = 2))
@@ -1442,7 +1488,7 @@ main <- function() {
correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered) correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered)
generate_and_save_plots( generate_and_save_plots(
out_dir = out_dir, out_dir = out_dir,
file_name = "Avg_Zscore_vs_lm_NA_rm", filename = "Avg_Zscore_vs_lm_NA_rm",
plot_configs = correlation_plot_configs, plot_configs = correlation_plot_configs,
grid_layout = list(ncol = 2, nrow = 2)) grid_layout = list(ncol = 2, nrow = 2))
}) })

View File

@@ -635,7 +635,7 @@ install_dependencies() {
depends_r=( depends_r=(
BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2 BiocManager ontologyIndex ggrepel tidyverse sos openxlsx ggplot2
dplyr rlang data.table unix gridExtra gplots stringr plotly ggthemes pandoc dplyr rlang data.table unix gridExtra gplots stringr plotly ggthemes pandoc
rmarkdown htmlwidgets gdata Hmisc) rmarkdown htmlwidgets gdata Hmisc future furrr)
depends_bioc=(UCSC.utils org.Sc.sgd.db) depends_bioc=(UCSC.utils org.Sc.sgd.db)
[[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars [[ $1 == "--get-depends" ]] && return 0 # if we just want to read the depends vars