From ed1e3cf7566a318c61f9d0b4272e82ae86f4a6dd Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sun, 1 Sep 2024 02:31:49 -0400 Subject: [PATCH] Auto-commit: apps/r/calculate_interaction_zscores5.R --- .../apps/r/calculate_interaction_zscores5.R | 83 ++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index 49183c71..16a1cf70 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -285,7 +285,6 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir) } df_strains <- bind_rows(df, df_temp) } - return(df_strains) } @@ -412,7 +411,7 @@ generate_summary_plots <- function(df, output_dir) { generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) { lm_summaries <- lapply(lm_list, summary) plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC", - "Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC") + "Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC") plot_list <- lapply(seq_along(lm_list), function(i) { ggplot(df_na_rm, aes_string(x = names(lm_list)[i][1], y = names(lm_list)[i][2])) + @@ -426,6 +425,81 @@ generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) { save_plots("Correlation_CPPs", plot_list, output_dir) } + +# Adjust missing values and calculate ranks +adjust_missing_and_rank <- function(df) { + df <- df %>% + mutate( + Avg_Zscore_L = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L), + Avg_Zscore_K = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K), + Avg_Zscore_r = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r), + Avg_Zscore_AUC = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC), + Z_lm_L = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L), + Z_lm_K = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K), + Z_lm_r = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r), + Z_lm_AUC = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC), + L_Rank = rank(Avg_Zscore_L), + K_Rank = rank(Avg_Zscore_K), + r_Rank = rank(Avg_Zscore_r), + AUC_Rank = rank(Avg_Zscore_AUC), + L_Rank_lm = rank(Z_lm_L), + K_Rank_lm = rank(Z_lm_K), + r_Rank_lm = rank(Z_lm_r), + AUC_Rank_lm = rank(Z_lm_AUC) + ) + return(df) +} + +# Generate ranked plots for a specific metric +generate_ranked_plot <- function(df, rank_var, zscore_var, sd_threshold, title_prefix) { + ggplot(df, aes(x = {{rank_var}}, y = {{zscore_var}})) + + ggtitle(paste(title_prefix, "above", sd_threshold, "SD")) + + xlab("Rank") + ylab(paste("Avg Z score", title_prefix)) + + annotate("rect", xmin = -Inf, xmax = Inf, ymin = sd_threshold, ymax = Inf, fill = "#542788", alpha = 0.3) + + annotate("rect", xmin = -Inf, xmax = Inf, ymin = -sd_threshold, ymax = -Inf, fill = "orange", alpha = 0.3) + + geom_hline(yintercept = c(-sd_threshold, sd_threshold)) + + geom_point(size = 0.1, shape = 3) + + theme_publication() +} + +# Generate and save all ranked plots +generate_and_save_ranked_plots <- function(df, output_dir, prefix) { + rank_metrics <- list( + list("L_Rank", "Avg_Zscore_L", "L"), + list("K_Rank", "Avg_Zscore_K", "K"), + list("r_Rank", "Avg_Zscore_r", "r"), + list("AUC_Rank", "Avg_Zscore_AUC", "AUC"), + list("L_Rank_lm", "Z_lm_L", "L"), + list("K_Rank_lm", "Z_lm_K", "K"), + list("r_Rank_lm", "Z_lm_r", "r"), + list("AUC_Rank_lm", "Z_lm_AUC", "AUC") + ) + + pdf(file.path(output_dir, paste0(prefix, ".pdf")), width = 18, height = 12, onefile = TRUE) + + for (sd_threshold in c(1, 2, 3)) { + for (metric in rank_metrics) { + plot <- generate_ranked_plot(df, sym(metric[[1]]), sym(metric[[2]]), sd_threshold, metric[[3]]) + print(plot) + } + } + + dev.off() +} + +# Function to create and save all ranked plots +create_ranked_plots <- function(df, output_dir) { + df_adjusted <- adjust_missing_and_rank(df) + + # Generate and save ranked plots + generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots") + generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_lm") + generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_naRM") + generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_lm_naRM") +} + + + main <- function() { # Applying to all experiments lapply(names(args$experiments), function(exp_name) { @@ -642,7 +716,12 @@ main <- function() { lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered), lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered) ) + + # Generate cpp correlation plots generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir) + + # Generate ranked plots + create_ranked_plots(zscores_interactions_filtered, out_dir) }) }) }