From 9c48b25b4257bf3ea21955771ac0b75e783c1b3d Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Sun, 1 Sep 2024 02:05:15 -0400 Subject: [PATCH] Auto-commit: apps/r/calculate_interaction_zscores5.R --- .../apps/r/calculate_interaction_zscores5.R | 1610 ++++++++--------- 1 file changed, 747 insertions(+), 863 deletions(-) diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index 9d67bd20..49183c71 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -205,7 +205,7 @@ generate_and_save_plots <- function(df, output_dir, prefix, variables, include_q } # Calculate summary statistics for all variables together -calculate_summary_stats <- function(df, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) { +calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) { summary_stats <- df %>% group_by(across(all_of(group_vars))) %>% summarise(across(all_of(variables), list( @@ -289,13 +289,12 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir) return(df_strains) } - calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, - df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables) { + df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { # Calculate all necessary statistics and shifts in one step - df_stats_interaction_all <- df %>% - group_by(OrfRep, Gene, num, conc_num, conc_num_factor) %>% + interaction_scores_all <- df %>% + group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>% summarise( N = n(), across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE), @@ -333,8 +332,8 @@ calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, ungroup() # Calculate linear models and interaction scores - interaction_scores <- df_stats_interaction_all %>% - group_by(OrfRep, Gene, num) %>% + interaction_scores <- interaction_scores_all %>% + group_by(across(all_of(group_vars))) %>% summarise( lm_l = list(lm(delta_l ~ conc_num_factor)), lm_k = list(lm(delta_k ~ conc_num_factor)), @@ -370,10 +369,62 @@ calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, raw_shift_auc, z_shift_auc, lm_Score_AUC, Z_lm_AUC = lm_Score_AUC, R_Squared_AUC, sum_z_score_auc, avg_zscore_auc, NG, DB, SM) - return(list(zscores_calculations = df_stats_interaction_all, zscores_interactions = interaction_scores)) + return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores)) } +generate_rf_plots <- function(df_calculations, df_interactions, output_dir, file_prefix = "RF") { + variables <- c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC") + WT_sds <- list(WT_sd_l = 2, WT_sd_K = 2, WT_sd_r = 0.65, WT_sd_AUC = 6500) + + plot_list <- lapply(seq_along(variables), function(i) { + var <- variables[i] + WT_sd <- WT_sds[[i]] + + ggplot(df_calculations, aes(conc_num_factor, !!sym(var))) + + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + + coord_cartesian(ylim = c(-WT_sd, WT_sd)) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd), ymax = 0 + (2 * WT_sd)), alpha = 0.3) + + ggtitle(paste(df_calculations$OrfRep[1], df_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 0.9 * WT_sd, label = paste("ZShift =", round(df_interactions[[paste0("Z_Shift_", var)]], 2))) + + annotate("text", x = 1, y = 0.7 * WT_sd, label = paste("lm Zscore =", round(df_interactions[[paste0("Z_lm_", var)]], 2))) + + annotate("text", x = 1, y = -0.7 * WT_sd, label = paste("NG =", df_interactions$NG)) + + annotate("text", x = 1, y = -0.9 * WT_sd, label = paste("DB =", df_interactions$DB)) + + annotate("text", x = 1, y = -1.1 * WT_sd, label = paste("SM =", df_interactions$SM)) + + scale_x_continuous( + name = unique(df_calculations$Drug[1]), + breaks = unique(df_calculations$conc_num_factor), + labels = unique(as.character(df_calculations$conc_num))) + + theme_publication() + }) + save_plots(file_prefix, plot_list, output_dir) +} + +generate_summary_plots <- function(df, output_dir) { + variables <- c("L", "K", "r", "AUC") + plot_list <- lapply(variables, function(var) { + generate_plot(df, x_var = conc_num_factor, y_var = !!sym(var), plot_type = "scatter", title = paste("Summary Plot for", var)) + }) + + save_plots("Summary_Plots", plot_list, output_dir) +} + +generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) { + lm_summaries <- lapply(lm_list, summary) + plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC", + "Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC") + + plot_list <- lapply(seq_along(lm_list), function(i) { + ggplot(df_na_rm, aes_string(x = names(lm_list)[i][1], y = names(lm_list)[i][2])) + + geom_point(shape = 3, color = "gray70") + + geom_smooth(method = "lm", color = "tomato3") + + ggtitle(plot_titles[i]) + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[i]]$r.squared, 3))) + + theme_Publication_legend_right() + }) + + save_plots("Correlation_CPPs", plot_list, output_dir) +} main <- function() { # Applying to all experiments @@ -390,9 +441,6 @@ main <- function() { df <- load_and_process_data(args$easy_results_file, exp_sd) df <- update_gene_names(df, args$sgd_gene_list) - # Variables for analysis - variables <- c("L", "K", "r", "AUC", "delta_bg") - # QC # Filter the df above sd tolerance df_above_tolerance <- df %>% filter(DB == 1) @@ -407,13 +455,15 @@ main <- function() { ) df_no_zeros <- df_na %>% filter(L > 0) - # Generate PDFs and HTMLs + # Variables for QC analysis + variables <- c("L", "K", "r", "AUC", "delta_bg") + + # Generate QC PDFs and HTMLs generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE) generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE) generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables) generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables) - # TODO We may be more cpu than memory contrained at this point, not sure rm(df_no_zeros, df_above_tolerance) # Summary statistics @@ -422,11 +472,10 @@ main <- function() { write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) # Calculate the pre-background stats once - stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) - stats_by_l <- df_stats %>% select(starts_with("L_"), all_of(group_vars)) - stats_by_k <- df_stats %>% select(starts_with("K_"), all_of(group_vars)) - stats_by_r <- df_stats %>% select(starts_with("r_"), all_of(group_vars)) - stats_by_auc <- df_stats %>% select(starts_with("AUC_"), all_of(group_vars)) + stats_by_l <- summary_stats %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor") + stats_by_k <- summary_stats %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor") + stats_by_r <- summary_stats %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor") + stats_by_auc <- summary_stats %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor") # Process background strains background_strains <- c("YDL227C") @@ -445,11 +494,11 @@ main <- function() { filter(!is.na(L)) # Recalculate summary statistics for the background strain - stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) - stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), all_of(group_vars)) - stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), all_of(group_vars)) - stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), all_of(group_vars)) - stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), all_of(group_vars)) + stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")) + stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor") + stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor") + stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor") + stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor") # Backup in case previous block explodes # Combine all summary statistics into one dataframe @@ -469,12 +518,12 @@ main <- function() { outside_2sd_k <- results_2sd$outside_2sd_k # Calculate summary statistics for L within and outside 2SD of K - l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L") + l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) write.csv(l_within_2sd_k, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE) - l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L") + l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) write.csv(l_outside_2sd_k, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE) @@ -484,10 +533,12 @@ main <- function() { background_means <- calculate_background_means(stats_by_l_background, stats_by_k_background, stats_by_r_background, stats_by_auc_background) + # Formerly X2_RF df_reference <- df_na %>% filter(OrfRep == strain) %>% mutate(SM = 0) + # Formerly X2 df_deletion <- df_na %>% filter(OrfRep != strain) %>% mutate(SM = 0) @@ -495,32 +546,35 @@ main <- function() { df_reference_strains <- process_strains(df_reference, stats_by_l_within_2sd_k, strain, out_dir) df_deletion_strains <- process_strains(df_deletion, stats_by_l_within_2sd_k, strain, out_dir) - # TODO: we are mutating OrfRep to make it more unique, is this a sensible approach? - # Should probably create a new column? + variables <- c("L", "K", "r", "AUC") + + # Deprecated # Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score # df_reference_strains <- df_reference_strains %>% # mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_")) - # num_genes <- length(unique(df_reference_strains$OrfRep)) - # print(num_genes) - - variables <- c("L", "K", "r", "AUC") - reference_results <- calculate_interaction_scores(df_reference, stats_by_l, stats_by_k, - stats_by_r, stats_by_auc, background_means, max_conc, variables, out_dir) + # This is synonymous with the legacy OrfRep mutation + # default_group_vars <- c("OrfRep", "Gene", "num") - # Process deletion strains - deletion_results <- calculate_interaction_scores(df_deletion, stats_by_l, stats_by_k, - stats_by_r, stats_by_auc, background_means, max_conc, variables, out_dir) + # Use group_by in functions in lieu of mutating OrfRep + # We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default + reference_results <- calculate_interaction_scores(df_reference, stats_by_l, stats_by_k, stats_by_r, stats_by_auc, + background_means, max_conc, variables) + + deletion_results <- calculate_interaction_scores(df_deletion, stats_by_l, stats_by_k, stats_by_r, stats_by_auc, + background_means, max_conc, variables) zscores_calculations_reference <- reference_results$zscores_calculations zscores_interactions_reference <- reference_results$zscores_interactions zscores_calculations <- deletion_results$zscores_calculations zscores_interactions <- deletion_results$zscores_interactions + + # TODO: I don't know if we need this? zscores_interactions <- zscores_interactions %>% arrange(desc(Z_lm_L)) %>% arrange(desc(NG)) - + write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE) write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE) write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) @@ -571,6 +625,24 @@ main <- function() { file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE) # Generate plots for interaction scores + generate_rf_plots(zscores_calculations_reference, zscores_interactions_reference, out_dir) + generate_rf_plots(zscores_calculations, zscores_interactions, out_dir) + + # Apply filtering to remove NA values before further analysis + zscores_interactions_filtered <- zscores_interactions %>% + filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) + + # Generate summary and correlation plots + generate_summary_plots(df, out_dir) + lm_list <- list( + lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered), + lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered), + lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered), + lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered), + lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered), + lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered) + ) + generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir) }) }) } @@ -578,891 +650,703 @@ main() -for (s in background_strains) { - # Initialize empty plots (placeholder for future plotting) - # p_l <- ggplot() - # p_k <- ggplot() - # p_r <- ggplot() - # p_auc <- ggplot() - # p_rf_l <- ggplot() - # p_rf_k <- ggplot() - # p_rf_r <- ggplot() - # p_rf_auc <- ggplot() - # Generate ggplot objects for each RF strain - for (i in seq_len(num_genes_RF)) { - gene_sel <- unique(interaction_scores_RF$OrfRep)[i] - df_z_calculations <- df_stats_interaction_all_RF %>% filter(OrfRep == gene_sel) - df_int_scores <- interaction_scores_RF %>% filter(OrfRep == gene_sel) + + + + + + + + + + + + + +# NEEDS REFACTORING +# for (s in background_strains) { + +# Initialize empty plots (placeholder for future plotting) +# p_l <- ggplot() +# p_k <- ggplot() +# p_r <- ggplot() +# p_auc <- ggplot() +# p_rf_l <- ggplot() +# p_rf_k <- ggplot() +# p_rf_r <- ggplot() +# p_rf_auc <- ggplot() + +# # Generate ggplot objects for each RF strain +# for (i in seq_len(num_genes_reference)) { +# gene_sel <- unique(interaction_scores_reference$OrfRep)[i] +# df_z_calculations <- df_stats_interaction_all_RF %>% filter(OrfRep == gene_sel) +# df_int_scores <- interaction_scores_RF %>% filter(OrfRep == gene_sel) - p_rf_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) + - geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-65, 65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + - ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) + - annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_L, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + - scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + - theme_publication() +# p_rf_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) + +# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + +# coord_cartesian(ylim = c(-65, 65)) + +# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + +# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + +# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) + +# annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_L, 2))) + +# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + +# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + +# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + +# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), +# labels = unique(as.character(df_z_calculations$conc_num))) + +# scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + +# theme_publication() - p_rf_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) + - geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-65, 65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) + - ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) + - annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_K, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + - scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + - theme_publication() +# p_rf_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) + +# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + +# coord_cartesian(ylim = c(-65, 65)) + +# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) + +# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + +# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) + +# annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_K, 2))) + +# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + +# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + +# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + +# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), +# labels = unique(as.character(df_z_calculations$conc_num))) + +# scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + +# theme_publication() - p_rf_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) + - geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-0.65, 0.65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + - ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) + - annotate("text", x = 1, y = 0.25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_r, 2))) + - annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + - scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + - theme_publication() +# p_rf_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) + +# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + +# coord_cartesian(ylim = c(-0.65, 0.65)) + +# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + +# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + +# annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) + +# annotate("text", x = 1, y = 0.25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_r, 2))) + +# annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + +# annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + +# annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + +# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), +# labels = unique(as.character(df_z_calculations$conc_num))) + +# scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + +# theme_publication() - p_rf_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) + - geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-6500, 6500)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) + - ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) + - annotate("text", x = 1, y = 2500, label = paste("lm Zscore =", round(df_int_scores$Z_lm_AUC, 2))) + - annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + - scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + - theme_publication() +# p_rf_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) + +# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + +# coord_cartesian(ylim = c(-6500, 6500)) + +# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) + +# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + +# annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) + +# annotate("text", x = 1, y = 2500, label = paste("lm Zscore =", round(df_int_scores$Z_lm_AUC, 2))) + +# annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + +# annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + +# annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + +# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), +# labels = unique(as.character(df_z_calculations$conc_num))) + +# scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + +# theme_publication() - # # Append the final interaction statistics for all RFs - # if (i == 1) { - # df_stats_interaction_all_RF_final <- df_z_calculations - # } else { - # df_stats_interaction_all_RF_final <- bind_rows(df_stats_interaction_all_RF_final, df_z_calculations) - # } - } - - # print("Pass RF ggplot loop") - - # # Save the final interaction statistics - # write.csv(df_stats_interaction_all_RF_final, file = file.path(output_dir, "RF_ZScore_Calculations.csv"), row.names = FALSE) - - ###### Part 5 - Get Z-scores for Gene Deletion Strains - - # Get the total number of genes for the loop - # num_genes <- length(unique(df_deletion$OrfRep)) - # print(num_genes) - - # # Create the output dataframe containing columns for each deletion strain - # interaction_scores_deletion <- unique(df_deletion["OrfRep"]) - # interaction_scores_deletion <- interaction_scores_deletion %>% - # mutate( - # Gene = NA, - # Raw_Shift_L = NA, Z_Shift_L = NA, lm_Score_L = NA, Z_lm_L = NA, R_Squared_L = NA, - # Sum_Z_Score_L = NA, Avg_Zscore_L = NA, - # Raw_Shift_K = NA, Z_Shift_K = NA, lm_Score_K = NA, Z_lm_K = NA, R_Squared_K = NA, - # Sum_Z_Score_K = NA, Avg_Zscore_K = NA, - # Raw_Shift_r = NA, Z_Shift_r = NA, lm_Score_r = NA, Z_lm_r = NA, R_Squared_r = NA, - # Sum_Z_Score_r = NA, Avg_Zscore_r = NA, - # Raw_Shift_AUC = NA, Z_Shift_AUC = NA, lm_Score_AUC = NA, Z_lm_AUC = NA, R_Squared_AUC = NA, - # Sum_Z_Score_AUC = NA, Avg_Zscore_AUC = NA, - # NG = NA, DB = NA, SM = NA - # ) - - # for (i in seq_len(num_genes)) { - # gene_sel <- unique(df_deletion$OrfRep)[i] - # df_gene_sel <- df_deletion %>% filter(OrfRep == gene_sel) +# # Loop through each gene to generate plots +# for (i in 1:num_genes) { +# gene_sel <- unique(interaction_scores_deletion$OrfRep)[i] +# df_z_calculations <- df_stats_interaction_all %>% filter(OrfRep == gene_sel) +# df_int_scores <- interaction_scores_deletion %>% filter(OrfRep == gene_sel) - # df_stats_interaction <- df_gene_sel %>% - # group_by(OrfRep, Gene, conc_num, conc_num_factor) %>% - # summarise( - # N = length(L), mean_L = mean(L, na.rm = TRUE), median_L = median(L, na.rm = TRUE), - # sd_L = sd(L, na.rm = TRUE), se_L = sd_L / sqrt(N - 1), - # mean_K = mean(K, na.rm = TRUE), median_K = median(K, na.rm = TRUE), - # sd_K = sd(K, na.rm = TRUE), se_K = sd_K / sqrt(N - 1), - # mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), - # sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), - # mean_AUC = mean(AUC, na.rm = TRUE), median_AUC = median(AUC, na.rm = TRUE), - # sd_AUC = sd(AUC, na.rm = TRUE), se_AUC = sd_AUC / sqrt(N - 1), - # NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) - # ) %>% - # ungroup() - - # if (is.na(df_stats_interaction$mean_L[1]) || df_stats_interaction$mean_L[1] == 0) { - # df_stats_interaction <- df_stats_interaction %>% - # mutate( - # Raw_Shift_L = 0, Raw_Shift_K = 0, Raw_Shift_r = 0, Raw_Shift_AUC = 0, - # Z_Shift_L = 0, Z_Shift_K = 0, Z_Shift_r = 0, Z_Shift_AUC = 0 - # ) - # } else { - # df_stats_interaction <- df_stats_interaction %>% - # mutate( - # Raw_Shift_L = mean_L[1] - background_L, Raw_Shift_K = mean_K[1] - background_K, - # Raw_Shift_r = mean_r[1] - background_r, Raw_Shift_AUC = mean_AUC[1] - background_AUC, - # Z_Shift_L = Raw_Shift_L[1] / df_stats_BY_L$sd[1], - # Z_Shift_K = Raw_Shift_K[1] / df_stats_BY_K$sd[1], - # Z_Shift_r = Raw_Shift_r[1] / df_stats_BY_r$sd[1], - # Z_Shift_AUC = Raw_Shift_AUC[1] / df_stats_BY_AUC$sd[1] - # ) - # } - - # df_stats_interaction <- df_stats_interaction %>% - # mutate( - # WT_l = df_stats_BY_L$mean, WT_K = df_stats_BY_K$mean, - # WT_r = df_stats_BY_r$mean, WT_AUC = df_stats_BY_AUC$mean, - # WT_sd_l = df_stats_BY_L$sd, WT_sd_K = df_stats_BY_K$sd, - # WT_sd_r = df_stats_BY_r$sd, WT_sd_AUC = df_stats_BY_AUC$sd - # ) - - # if (df_stats_interaction$mean_L[1] != 0 && !is.na(df_stats_interaction$mean_L[1])) { - # df_stats_interaction <- df_stats_interaction %>% - # mutate( - # Exp_L = WT_l + Raw_Shift_L, Exp_K = WT_K + Raw_Shift_K, - # Exp_r = WT_r + Raw_Shift_r, Exp_AUC = WT_AUC + Raw_Shift_AUC, - # Delta_L = mean_L - Exp_L, Delta_K = mean_K - Exp_K, - # Delta_r = mean_r - Exp_r, Delta_AUC = mean_AUC - Exp_AUC - # ) - - # if (sum(df_stats_interaction$NG, na.rm = TRUE) > 0) { - # df_stats_interaction <- df_stats_interaction %>% - # mutate( - # Delta_L = if_else(NG == 1, mean_L - WT_l, Delta_L), - # Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), - # Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r), - # Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC) - # ) - # } - - # if (sum(df_stats_interaction$SM, na.rm = TRUE) > 0) { - # df_stats_interaction <- df_stats_interaction %>% - # mutate(Delta_L = if_else(SM == 1, mean_L - WT_l, Delta_L)) - # } - - # df_stats_interaction <- df_stats_interaction %>% - # mutate( - # Zscore_L = Delta_L / WT_sd_l, Zscore_K = Delta_K / WT_sd_K, - # Zscore_r = Delta_r / WT_sd_r, Zscore_AUC = Delta_AUC / WT_sd_AUC - # ) - - # gene_lm_L <- lm(Delta_L ~ conc_num_factor, data = df_stats_interaction) - # gene_lm_K <- lm(Delta_K ~ conc_num_factor, data = df_stats_interaction) - # gene_lm_r <- lm(Delta_r ~ conc_num_factor, data = df_stats_interaction) - # gene_lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = df_stats_interaction) - - # gene_interaction_L <- max_conc * coef(gene_lm_L)[2] + coef(gene_lm_L)[1] - # gene_interaction_K <- max_conc * coef(gene_lm_K)[2] + coef(gene_lm_K)[1] - # gene_interaction_r <- max_conc * coef(gene_lm_r)[2] + coef(gene_lm_r)[1] - # gene_interaction_AUC <- max_conc * coef(gene_lm_AUC)[2] + coef(gene_lm_AUC)[1] - - # r_squared_l <- summary(gene_lm_L)$r.squared - # r_squared_K <- summary(gene_lm_K)$r.squared - # r_squared_r <- summary(gene_lm_r)$r.squared - # r_squared_AUC <- summary(gene_lm_AUC)$r.squared - - # num_non_removed_conc <- total_conc_nums - sum(df_stats_interaction$DB, na.rm = TRUE) - 1 - - # interaction_scores_deletion <- interaction_scores_deletion %>% - # mutate( - # Gene = replace(Gene, OrfRep == gene_sel, df_gene_sel$Gene[1]), - # Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_L[1]), - # Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, df_stats_interaction$Z_Shift_L[1]), - # lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, gene_interaction_L), - # Z_lm_L = replace(Z_lm_L, OrfRep == gene_sel, (gene_interaction_L - lm_mean_L) / lm_sd_L), - # R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, r_squared_l), - # Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE)), - # Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE) / num_non_removed_conc), - # Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_K[1]), - # Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, df_stats_interaction$Z_Shift_K[1]), - # lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, gene_interaction_K), - # Z_lm_K = replace(Z_lm_K, OrfRep == gene_sel, (gene_interaction_K - lm_mean_K) / lm_sd_K), - # R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, r_squared_K), - # Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE)), - # Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE) / num_non_removed_conc), - # Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_r[1]), - # Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, df_stats_interaction$Z_Shift_r[1]), - # lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, gene_interaction_r), - # Z_lm_r = replace(Z_lm_r, OrfRep == gene_sel, (gene_interaction_r - lm_mean_r) / lm_sd_r), - # R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, r_squared_r), - # Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE)), - # Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE) / (total_conc_nums - 1)), - # Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_AUC[1]), - # Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Z_Shift_AUC[1]), - # lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, gene_interaction_AUC), - # Z_lm_AUC = replace(Z_lm_AUC, OrfRep == gene_sel, (gene_interaction_AUC - lm_mean_AUC) / lm_sd_AUC), - # R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, r_squared_AUC), - # Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE)), - # Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE) / (total_conc_nums - 1)) - # ) - # } else { - # # Similar logic for when mean_L is 0 or NA, setting relevant variables to NA or appropriate values - # interaction_scores_deletion <- interaction_scores_deletion %>% - # mutate( - # Gene = replace(Gene, OrfRep == gene_sel, df_gene_sel$Gene[1]), - # Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, NA), - # Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, NA), - # lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, NA), - # Z_lm_L = replace(Z_lm_L, OrfRep == gene_sel, NA), - # R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, NA), - # Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, NA), - # Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, NA), - # Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, NA), - # Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, NA), - # lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, NA), - # Z_lm_K = replace(Z_lm_K, OrfRep == gene_sel, NA), - # R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, NA), - # Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, NA), - # Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, NA), - # Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, NA), - # Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, NA), - # lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, NA), - # Z_lm_r = replace(Z_lm_r, OrfRep == gene_sel, NA), - # R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, NA), - # Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, NA), - # Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, NA), - # Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, NA), - # Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, NA), - # lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, NA), - # Z_lm_AUC = replace(Z_lm_AUC, OrfRep == gene_sel, NA), - # R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, NA), - # Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, NA), - # Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, NA) - # ) - # } - - # if (i == 1) { - # df_stats_interaction_all <- df_stats_interaction - # } else { - # df_stats_interaction_all <- bind_rows(df_stats_interaction_all, df_stats_interaction) - # } - - # interaction_scores_deletion <- interaction_scores_deletion %>% - # mutate( - # NG = replace(NG, OrfRep == gene_sel, sum(df_stats_interaction$NG, na.rm = TRUE)), - # DB = replace(DB, OrfRep == gene_sel, sum(df_stats_interaction$DB, na.rm = TRUE)), - # SM = replace(SM, OrfRep == gene_sel, sum(df_stats_interaction$SM, na.rm = TRUE)) - # ) - # } - - # print("Pass Int Calculation loop") - - # # Order the interaction scores by Z_lm_L and NG - # interaction_scores_deletion <- interaction_scores_deletion %>% - # arrange(desc(Z_lm_L)) %>% - # arrange(desc(NG)) - - # Save the interaction scores and filtered sets for enhancers and suppressors - # output_files <- list( - # "ZScores_Interaction.csv" = interaction_scores_deletion, - # "ZScores_Interaction_DeletionEnhancers_L.csv" = filter(interaction_scores_deletion, Avg_Zscore_L >= 2), - # "ZScores_Interaction_DeletionEnhancers_K.csv" = filter(interaction_scores_deletion, Avg_Zscore_K <= -2), - # "ZScores_Interaction_DeletionSuppressors_L.csv" = filter(interaction_scores_deletion, Avg_Zscore_L <= -2), - # "ZScores_Interaction_DeletionSuppressors_K.csv" = filter(interaction_scores_deletion, Avg_Zscore_K >= 2), - # "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv" = filter(interaction_scores_deletion, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2), - # "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv" = filter(interaction_scores_deletion, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2), - # "ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv" = filter(interaction_scores_deletion, Z_lm_L >= 2 & Avg_Zscore_L <= -2), - # "ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv" = filter(interaction_scores_deletion, Z_lm_L <= -2 & Avg_Zscore_L >= 2), - # "ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv" = filter(interaction_scores_deletion, Z_lm_K <= -2 & Avg_Zscore_K >= 2), - # "ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv" = filter(interaction_scores_deletion, Z_lm_K >= 2 & Avg_Zscore_K <= -2) - # ) - - # for (file_name in names(output_files)) { - # write.csv(output_files[[file_name]], file = file.path(output_dir, file_name), row.names = FALSE) - # } - - # # Further filtering for linear regression enhancers and suppressors - # output_files_lm <- list( - # "ZScores_Interaction_DeletionEnhancers_L_lm.csv" = filter(interaction_scores_deletion, Z_lm_L >= 2), - # "ZScores_Interaction_DeletionEnhancers_K_lm.csv" = filter(interaction_scores_deletion, Z_lm_K <= -2), - # "ZScores_Interaction_DeletionSuppressors_L_lm.csv" = filter(interaction_scores_deletion, Z_lm_L <= -2), - # "ZScores_Interaction_DeletionSuppressors_K_lm.csv" = filter(interaction_scores_deletion, Z_lm_K >= 2), - # "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv" = filter(interaction_scores_deletion, Z_lm_L >= 2 | Z_lm_L <= -2), - # "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv" = filter(interaction_scores_deletion, Z_lm_K >= 2 | Z_lm_K <= -2) - # ) - - # for (file_name in names(output_files_lm)) { - # write.csv(output_files_lm[[file_name]], file = file.path(output_dir, file_name), row.names = FALSE) - # } - - - # Loop through each gene to generate plots - for (i in 1:num_genes) { - gene_sel <- unique(interaction_scores_deletion$OrfRep)[i] - df_z_calculations <- df_stats_interaction_all %>% filter(OrfRep == gene_sel) - df_int_scores <- interaction_scores_deletion %>% filter(OrfRep == gene_sel) +# p_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) + +# geom_point() + +# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + +# coord_cartesian(ylim = c(-65, 65)) + +# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + +# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + +# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) + +# annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_L, 2))) + +# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + +# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + +# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + +# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), +# labels = unique(as.character(df_z_calculations$conc_num))) + +# scale_y_continuous(breaks = seq(-60, 60, 10)) + +# theme_Publication() - p_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) + - geom_point() + - geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-65, 65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + - ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) + - annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_L, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + - scale_y_continuous(breaks = seq(-60, 60, 10)) + - theme_Publication() +# p_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) + +# geom_point() + +# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + +# coord_cartesian(ylim = c(-65, 65)) + +# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) + +# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + +# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) + +# annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_K, 2))) + +# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + +# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + +# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + +# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), +# labels = unique(as.character(df_z_calculations$conc_num))) + +# scale_y_continuous(breaks = seq(-60, 60, 10)) + +# theme_Publication() - p_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) + - geom_point() + - geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-65, 65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) + - ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) + - annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_K, 2))) + - annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + - scale_y_continuous(breaks = seq(-60, 60, 10)) + - theme_Publication() +# p_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) + +# geom_point() + +# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + +# coord_cartesian(ylim = c(-0.65, 0.65)) + +# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + +# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + +# annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) + +# annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_r, 2))) + +# annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + +# annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + +# annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + +# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), +# labels = unique(as.character(df_z_calculations$conc_num))) + +# scale_y_continuous(breaks = seq(-0.6, 0.6, 0.2)) + +# theme_Publication() - p_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) + - geom_point() + - geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-0.65, 0.65)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + - ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) + - annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_r, 2))) + - annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + - scale_y_continuous(breaks = seq(-0.6, 0.6, 0.2)) + - theme_Publication() +# p_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) + +# geom_point() + +# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + +# coord_cartesian(ylim = c(-6500, 6500)) + +# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) + +# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + +# annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) + +# annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(df_int_scores$Z_lm_AUC, 2))) + +# annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + +# annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + +# annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + +# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), +# labels = unique(as.character(df_z_calculations$conc_num))) + +# scale_y_continuous(breaks = seq(-6000, 6000, 1000)) + +# theme_Publication() - p_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) + - geom_point() + - geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + - coord_cartesian(ylim = c(-6500, 6500)) + - geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) + - ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + - annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) + - annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(df_int_scores$Z_lm_AUC, 2))) + - annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + - annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + - annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + - scale_y_continuous(breaks = seq(-6000, 6000, 1000)) + - theme_Publication() - - if (i == 1) { - df_stats_interaction_all_final <- df_z_calculations - } else { - df_stats_interaction_all_final <- bind_rows(df_stats_interaction_all_final, df_z_calculations) - } - } +# if (i == 1) { +# df_stats_interaction_all_final <- df_z_calculations +# } else { +# df_stats_interaction_all_final <- bind_rows(df_stats_interaction_all_final, df_z_calculations) +# } +# } - print("Pass Int ggplot loop") - write.csv(df_stats_interaction_all_final, file = file.path(output_dir, "ZScore_Calculations.csv"), row.names = FALSE) +# print("Pass Int ggplot loop") +# write.csv(df_stats_interaction_all_final, file = file.path(output_dir, "ZScore_Calculations.csv"), row.names = FALSE) - # Generate a blank plot for alignment purposes - blank_plot <- ggplot(df2_rf) + geom_blank() +# # Generate a blank plot for alignment purposes +# blank_plot <- ggplot(df2_rf) + geom_blank() - # Create PDF for interaction plots - pdf(file.path(output_dir, "InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE) +# # Create PDF for interaction plots +# pdf(file.path(output_dir, "InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE) - # Summarize stats for X2_RF - df_stats_rf <- df2_rf %>% - group_by(conc_num, conc_num_factor) %>% - summarise( - mean_L = mean(L, na.rm = TRUE), - median_L = median(L, na.rm = TRUE), - max_L = max(L, na.rm = TRUE), - min_L = min(L, na.rm = TRUE), - sd_L = sd(L, na.rm = TRUE), - mean_K = mean(K, na.rm = TRUE), - median_K = median(K, na.rm = TRUE), - max_K = max(K, na.rm = TRUE), - min_K = min(K, na.rm = TRUE), - sd_K = sd(K, na.rm = TRUE), - mean_r = mean(r, na.rm = TRUE), - median_r = median(r, na.rm = TRUE), - max_r = max(r, na.rm = TRUE), - min_r = min(r, na.rm = TRUE), - sd_r = sd(r, na.rm = TRUE), - mean_AUC = mean(AUC, na.rm = TRUE), - median_AUC = median(AUC, na.rm = TRUE), - max_AUC = max(AUC, na.rm = TRUE), - min_AUC = min(AUC, na.rm = TRUE), - sd_AUC = sd(AUC, na.rm = TRUE), - NG = sum(NG, na.rm = TRUE), - DB = sum(DB, na.rm = TRUE), - SM = sum(SM, na.rm = TRUE) - ) +# # Summarize stats for X2_RF +# df_stats_rf <- df2_rf %>% +# group_by(conc_num, conc_num_factor) %>% +# summarise( +# mean_L = mean(L, na.rm = TRUE), +# median_L = median(L, na.rm = TRUE), +# max_L = max(L, na.rm = TRUE), +# min_L = min(L, na.rm = TRUE), +# sd_L = sd(L, na.rm = TRUE), +# mean_K = mean(K, na.rm = TRUE), +# median_K = median(K, na.rm = TRUE), +# max_K = max(K, na.rm = TRUE), +# min_K = min(K, na.rm = TRUE), +# sd_K = sd(K, na.rm = TRUE), +# mean_r = mean(r, na.rm = TRUE), +# median_r = median(r, na.rm = TRUE), +# max_r = max(r, na.rm = TRUE), +# min_r = min(r, na.rm = TRUE), +# sd_r = sd(r, na.rm = TRUE), +# mean_AUC = mean(AUC, na.rm = TRUE), +# median_AUC = median(AUC, na.rm = TRUE), +# max_AUC = max(AUC, na.rm = TRUE), +# min_AUC = min(AUC, na.rm = TRUE), +# sd_AUC = sd(AUC, na.rm = TRUE), +# NG = sum(NG, na.rm = TRUE), +# DB = sum(DB, na.rm = TRUE), +# SM = sum(SM, na.rm = TRUE) +# ) - # Create L statistics scatter plot - plot_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = ~ mean(.) - sd(.), - fun.max = ~ mean(.) + sd(.), - geom = "errorbar", color = "red" - ) + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 160)) + - annotate("text", x = -0.25, y = 10, label = "NG") + - annotate("text", x = -0.25, y = 5, label = "DB") + - annotate("text", x = -0.25, y = 0, label = "SM") + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) + - theme_Publication() +# # Create L statistics scatter plot +# plot_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) + +# geom_point(position = "jitter", size = 1) + +# stat_summary( +# fun = mean, +# fun.min = ~ mean(.) - sd(.), +# fun.max = ~ mean(.) + sd(.), +# geom = "errorbar", color = "red" +# ) + +# stat_summary(fun = mean, geom = "point", color = "red") + +# scale_x_continuous(name = unique(df$Drug[1]), +# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 160)) + +# annotate("text", x = -0.25, y = 10, label = "NG") + +# annotate("text", x = -0.25, y = 5, label = "DB") + +# annotate("text", x = -0.25, y = 0, label = "SM") + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) + +# theme_Publication() - # Create K statistics scatter plot - plot_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = ~ mean(.) - sd(.), - fun.max = ~ mean(.) + sd(.), - geom = "errorbar", color = "red" - ) + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + - coord_cartesian(ylim = c(-20, 160)) + - annotate("text", x = -0.25, y = -5, label = "NG") + - annotate("text", x = -0.25, y = -12.5, label = "DB") + - annotate("text", x = -0.25, y = -20, label = "SM") + - annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) + - theme_Publication() +# # Create K statistics scatter plot +# plot_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) + +# geom_point(position = "jitter", size = 1) + +# stat_summary( +# fun = mean, +# fun.min = ~ mean(.) - sd(.), +# fun.max = ~ mean(.) + sd(.), +# geom = "errorbar", color = "red" +# ) + +# stat_summary(fun = mean, geom = "point", color = "red") + +# scale_x_continuous(name = unique(df$Drug[1]), +# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + +# coord_cartesian(ylim = c(-20, 160)) + +# annotate("text", x = -0.25, y = -5, label = "NG") + +# annotate("text", x = -0.25, y = -12.5, label = "DB") + +# annotate("text", x = -0.25, y = -20, label = "SM") + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) + +# theme_Publication() - # Create r statistics scatter plot - plot_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = ~ mean(.) - sd(.), - fun.max = ~ mean(.) + sd(.), - geom = "errorbar", color = "red" - ) + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 1)) + - annotate("text", x = -0.25, y = .9, label = "NG") + - annotate("text", x = -0.25, y = .8, label = "DB") + - annotate("text", x = -0.25, y = .7, label = "SM") + - annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) + - theme_Publication() +# # Create r statistics scatter plot +# plot_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + +# geom_point(position = "jitter", size = 1) + +# stat_summary( +# fun = mean, +# fun.min = ~ mean(.) - sd(.), +# fun.max = ~ mean(.) + sd(.), +# geom = "errorbar", color = "red" +# ) + +# stat_summary(fun = mean, geom = "point", color = "red") + +# scale_x_continuous(name = unique(df$Drug[1]), +# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 1)) + +# annotate("text", x = -0.25, y = .9, label = "NG") + +# annotate("text", x = -0.25, y = .8, label = "DB") + +# annotate("text", x = -0.25, y = .7, label = "SM") + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) + +# theme_Publication() - # Create AUC statistics scatter plot - plot_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = ~ mean(.) - sd(.), - fun.max = ~ mean(.) + sd(.), - geom = "errorbar", color = "red" - ) + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 12500)) + - annotate("text", x = -0.25, y = 11000, label = "NG") + - annotate("text", x = -0.25, y = 10000, label = "DB") + - annotate("text", x = -0.25, y = 9000, label = "SM") + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) + - theme_Publication() +# # Create AUC statistics scatter plot +# plot_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) + +# geom_point(position = "jitter", size = 1) + +# stat_summary( +# fun = mean, +# fun.min = ~ mean(.) - sd(.), +# fun.max = ~ mean(.) + sd(.), +# geom = "errorbar", color = "red" +# ) + +# stat_summary(fun = mean, geom = "point", color = "red") + +# scale_x_continuous(name = unique(df$Drug[1]), +# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 12500)) + +# annotate("text", x = -0.25, y = 11000, label = "NG") + +# annotate("text", x = -0.25, y = 10000, label = "DB") + +# annotate("text", x = -0.25, y = 9000, label = "SM") + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) + +# theme_Publication() - # Arrange and plot scatter plots - grid.arrange(plot_l_stats, plot_k_stats, plot_r_stats, plot_auc_stats, ncol = 2, nrow = 2) +# # Arrange and plot scatter plots +# grid.arrange(plot_l_stats, plot_k_stats, plot_r_stats, plot_auc_stats, ncol = 2, nrow = 2) - # Create box plots for each statistic - plot_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) + - geom_boxplot() + - scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 160)) + - theme_Publication() +# # Create box plots for each statistic +# plot_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) + +# geom_boxplot() + +# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 160)) + +# theme_Publication() - plot_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) + - geom_boxplot() + - scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 130)) + - theme_Publication() +# plot_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) + +# geom_boxplot() + +# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 130)) + +# theme_Publication() - plot_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + - geom_boxplot() + - scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 1)) + - theme_Publication() +# plot_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + +# geom_boxplot() + +# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 1)) + +# theme_Publication() - plot_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) + - geom_boxplot() + - scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 12500)) + - theme_Publication() +# plot_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) + +# geom_boxplot() + +# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 12500)) + +# theme_Publication() - # Arrange and plot box plots - grid.arrange(plot_l_stats_box, plot_k_stats_box, plot_r_stats_box, plot_auc_stats_box, ncol = 2, nrow = 2) +# # Arrange and plot box plots +# grid.arrange(plot_l_stats_box, plot_k_stats_box, plot_r_stats_box, plot_auc_stats_box, ncol = 2, nrow = 2) - # Loop to arrange and print combined plots - plot_indices <- seq(1, (num_genes - 1), by = 3) - for (m in seq_along(plot_indices)) { - grid.arrange( - p_l[[plot_indices[m]]], p_k[[plot_indices[m]]], p_r[[plot_indices[m]]], p_auc[[plot_indices[m]]], - p_l[[plot_indices[m] + 1]], p_k[[plot_indices[m] + 1]], p_r[[plot_indices[m] + 1]], p_auc[[plot_indices[m] + 1]], - p_l[[plot_indices[m] + 2]], p_k[[plot_indices[m] + 2]], p_r[[plot_indices[m] + 2]], p_auc[[plot_indices[m] + 2]], - ncol = 4, nrow = 3 - ) - } +# # Loop to arrange and print combined plots +# plot_indices <- seq(1, (num_genes - 1), by = 3) +# for (m in seq_along(plot_indices)) { +# grid.arrange( +# p_l[[plot_indices[m]]], p_k[[plot_indices[m]]], p_r[[plot_indices[m]]], p_auc[[plot_indices[m]]], +# p_l[[plot_indices[m] + 1]], p_k[[plot_indices[m] + 1]], p_r[[plot_indices[m] + 1]], p_auc[[plot_indices[m] + 1]], +# p_l[[plot_indices[m] + 2]], p_k[[plot_indices[m] + 2]], p_r[[plot_indices[m] + 2]], p_auc[[plot_indices[m] + 2]], +# ncol = 4, nrow = 3 +# ) +# } - # Handle leftover plots if num_genes is not a multiple of 3 - remaining_plots <- num_genes - max(plot_indices + 2) - if (remaining_plots > 0) { - plot_grid_list <- lapply(seq_len(remaining_plots), function(i) { - list(p_l[[plot_indices[length(plot_indices)] + i]], p_k[[plot_indices[length(plot_indices)] + i]], p_r[[plot_indices[length(plot_indices)] + i]], p_auc[[plot_indices[length(plot_indices)] + i]]) - }) - do.call(grid.arrange, c(plot_grid_list, list(ncol = 4, nrow = 3))) - } +# # Handle leftover plots if num_genes is not a multiple of 3 +# remaining_plots <- num_genes - max(plot_indices + 2) +# if (remaining_plots > 0) { +# plot_grid_list <- lapply(seq_len(remaining_plots), function(i) { +# list(p_l[[plot_indices[length(plot_indices)] + i]], +# p_k[[plot_indices[length(plot_indices)] + i]], +# p_r[[plot_indices[length(plot_indices)] + i]], +# p_auc[[plot_indices[length(plot_indices)] + i]]) +# }) +# do.call(grid.arrange, c(plot_grid_list, list(ncol = 4, nrow = 3))) +# } - dev.off() +# dev.off() - # Additional PDF output for RF interaction plots - # Generate PDF for RF interaction plots - pdf(file.path(output_dir, "RF_InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE) +# # Additional PDF output for RF interaction plots +# # Generate PDF for RF interaction plots +# pdf(file.path(output_dir, "RF_InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE) - # Summarize stats for RF data - df_stats_rf <- df2_rf %>% - group_by(conc_num, conc_num_factor) %>% - summarise( - mean_L = mean(L, na.rm = TRUE), - median_L = median(L, na.rm = TRUE), - max_L = max(L, na.rm = TRUE), - min_L = min(L, na.rm = TRUE), - sd_L = sd(L, na.rm = TRUE), - mean_K = mean(K, na.rm = TRUE), - median_K = median(K, na.rm = TRUE), - max_K = max(K, na.rm = TRUE), - min_K = min(K, na.rm = TRUE), - sd_K = sd(K, na.rm = TRUE), - mean_r = mean(r, na.rm = TRUE), - median_r = median(r, na.rm = TRUE), - max_r = max(r, na.rm = TRUE), - min_r = min(r, na.rm = TRUE), - sd_r = sd(r, na.rm = TRUE), - mean_AUC = mean(AUC, na.rm = TRUE), - median_AUC = median(AUC, na.rm = TRUE), - max_AUC = max(AUC, na.rm = TRUE), - min_AUC = min(AUC, na.rm = TRUE), - sd_AUC = sd(AUC, na.rm = TRUE), - NG = sum(NG, na.rm = TRUE), - DB = sum(DB, na.rm = TRUE), - SM = sum(SM, na.rm = TRUE) - ) +# # Summarize stats for RF data +# df_stats_rf <- df2_rf %>% +# group_by(conc_num, conc_num_factor) %>% +# summarise( +# mean_L = mean(L, na.rm = TRUE), +# median_L = median(L, na.rm = TRUE), +# max_L = max(L, na.rm = TRUE), +# min_L = min(L, na.rm = TRUE), +# sd_L = sd(L, na.rm = TRUE), +# mean_K = mean(K, na.rm = TRUE), +# median_K = median(K, na.rm = TRUE), +# max_K = max(K, na.rm = TRUE), +# min_K = min(K, na.rm = TRUE), +# sd_K = sd(K, na.rm = TRUE), +# mean_r = mean(r, na.rm = TRUE), +# median_r = median(r, na.rm = TRUE), +# max_r = max(r, na.rm = TRUE), +# min_r = min(r, na.rm = TRUE), +# sd_r = sd(r, na.rm = TRUE), +# mean_AUC = mean(AUC, na.rm = TRUE), +# median_AUC = median(AUC, na.rm = TRUE), +# max_AUC = max(AUC, na.rm = TRUE), +# min_AUC = min(AUC, na.rm = TRUE), +# sd_AUC = sd(AUC, na.rm = TRUE), +# NG = sum(NG, na.rm = TRUE), +# DB = sum(DB, na.rm = TRUE), +# SM = sum(SM, na.rm = TRUE) +# ) - # Create L statistics scatter plot for RF data - plot_rf_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = ~ mean(.) - sd(.), - fun.max = ~ mean(.) + sd(.), - geom = "errorbar", color = "red" - ) + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 130)) + - annotate("text", x = -0.25, y = 10, label = "NG") + - annotate("text", x = -0.25, y = 5, label = "DB") + - annotate("text", x = -0.25, y = 0, label = "SM") + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) + - theme_Publication() +# # Create L statistics scatter plot for RF data +# plot_rf_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) + +# geom_point(position = "jitter", size = 1) + +# stat_summary( +# fun = mean, +# fun.min = ~ mean(.) - sd(.), +# fun.max = ~ mean(.) + sd(.), +# geom = "errorbar", color = "red" +# ) + +# stat_summary(fun = mean, geom = "point", color = "red") + +# scale_x_continuous(name = unique(df$Drug[1]), +# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 130)) + +# annotate("text", x = -0.25, y = 10, label = "NG") + +# annotate("text", x = -0.25, y = 5, label = "DB") + +# annotate("text", x = -0.25, y = 0, label = "SM") + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) + +# theme_Publication() - # Create K statistics scatter plot for RF data - plot_rf_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = ~ mean(.) - sd(.), - fun.max = ~ mean(.) + sd(.), - geom = "errorbar", color = "red" - ) + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + - coord_cartesian(ylim = c(-20, 160)) + - annotate("text", x = -0.25, y = -5, label = "NG") + - annotate("text", x = -0.25, y = -12.5, label = "DB") + - annotate("text", x = -0.25, y = -20, label = "SM") + - annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) + - theme_Publication() +# # Create K statistics scatter plot for RF data +# plot_rf_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) + +# geom_point(position = "jitter", size = 1) + +# stat_summary( +# fun = mean, +# fun.min = ~ mean(.) - sd(.), +# fun.max = ~ mean(.) + sd(.), +# geom = "errorbar", color = "red" +# ) + +# stat_summary(fun = mean, geom = "point", color = "red") + +# scale_x_continuous(name = unique(df$Drug[1]), +# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + +# coord_cartesian(ylim = c(-20, 160)) + +# annotate("text", x = -0.25, y = -5, label = "NG") + +# annotate("text", x = -0.25, y = -12.5, label = "DB") + +# annotate("text", x = -0.25, y = -20, label = "SM") + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) + +# theme_Publication() - # Create r statistics scatter plot for RF data - plot_rf_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = ~ mean(.) - sd(.), - fun.max = ~ mean(.) + sd(.), - geom = "errorbar", color = "red" - ) + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 1)) + - annotate("text", x = -0.25, y = .9, label = "NG") + - annotate("text", x = -0.25, y = .8, label = "DB") + - annotate("text", x = -0.25, y = .7, label = "SM") + - annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) + - theme_Publication() +# # Create r statistics scatter plot for RF data +# plot_rf_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + +# geom_point(position = "jitter", size = 1) + +# stat_summary( +# fun = mean, +# fun.min = ~ mean(.) - sd(.), +# fun.max = ~ mean(.) + sd(.), +# geom = "errorbar", color = "red" +# ) + +# stat_summary(fun = mean, geom = "point", color = "red") + +# scale_x_continuous(name = unique(df$Drug[1]), +# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 1)) + +# annotate("text", x = -0.25, y = .9, label = "NG") + +# annotate("text", x = -0.25, y = .8, label = "DB") + +# annotate("text", x = -0.25, y = .7, label = "SM") + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) + +# theme_Publication() - # Create AUC statistics scatter plot for RF data - plot_rf_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) + - geom_point(position = "jitter", size = 1) + - stat_summary( - fun = mean, - fun.min = ~ mean(.) - sd(.), - fun.max = ~ mean(.) + sd(.), - geom = "errorbar", color = "red" - ) + - stat_summary(fun = mean, geom = "point", color = "red") + - scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 12500)) + - annotate("text", x = -0.25, y = 11000, label = "NG") + - annotate("text", x = -0.25, y = 10000, label = "DB") + - annotate("text", x = -0.25, y = 9000, label = "SM") + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) + - annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) + - theme_Publication() +# # Create AUC statistics scatter plot for RF data +# plot_rf_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) + +# geom_point(position = "jitter", size = 1) + +# stat_summary( +# fun = mean, +# fun.min = ~ mean(.) - sd(.), +# fun.max = ~ mean(.) + sd(.), +# geom = "errorbar", color = "red" +# ) + +# stat_summary(fun = mean, geom = "point", color = "red") + +# scale_x_continuous(name = unique(df$Drug[1]), +# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 12500)) + +# annotate("text", x = -0.25, y = 11000, label = "NG") + +# annotate("text", x = -0.25, y = 10000, label = "DB") + +# annotate("text", x = -0.25, y = 9000, label = "SM") + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) + +# annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) + +# theme_Publication() - # Arrange and plot RF scatter plots - grid.arrange(plot_rf_l_stats, plot_rf_k_stats, plot_rf_r_stats, plot_rf_auc_stats, ncol = 2, nrow = 2) +# # Arrange and plot RF scatter plots +# grid.arrange(plot_rf_l_stats, plot_rf_k_stats, plot_rf_r_stats, plot_rf_auc_stats, ncol = 2, nrow = 2) - # Create box plots for each RF statistic - plot_rf_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) + - geom_boxplot() + - scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 130)) + - theme_Publication() +# # Create box plots for each RF statistic +# plot_rf_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) + +# geom_boxplot() + +# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 130)) + +# theme_Publication() - plot_rf_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) + - geom_boxplot() + - scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 160)) + - theme_Publication() +# plot_rf_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) + +# geom_boxplot() + +# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 160)) + +# theme_Publication() - plot_rf_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + - geom_boxplot() + - scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 1)) + - theme_Publication() +# plot_rf_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + +# geom_boxplot() + +# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 1)) + +# theme_Publication() - plot_rf_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) + - geom_boxplot() + - scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + - ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + - coord_cartesian(ylim = c(0, 12500)) + - theme_Publication() +# plot_rf_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) + +# geom_boxplot() + +# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + +# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + +# coord_cartesian(ylim = c(0, 12500)) + +# theme_Publication() - # Arrange and plot RF box plots - grid.arrange(plot_rf_l_stats_box, plot_rf_k_stats_box, plot_rf_r_stats_box, plot_rf_auc_stats_box, ncol = 2, nrow = 2) +# # Arrange and plot RF box plots +# grid.arrange(plot_rf_l_stats_box, plot_rf_k_stats_box, plot_rf_r_stats_box, plot_rf_auc_stats_box, ncol = 2, nrow = 2) - # Loop to arrange and print combined RF plots - plot_indices_rf <- seq(1, (num_genes_RF - 1), by = 3) - for (m in seq_along(plot_indices_rf)) { - grid.arrange( - p_rf_l[[plot_indices_rf[m]]], p_rf_k[[plot_indices_rf[m]]], p_rf_r[[plot_indices_rf[m]]], p_rf_auc[[plot_indices_rf[m]]], - p_rf_l[[plot_indices_rf[m] + 1]], p_rf_k[[plot_indices_rf[m] + 1]], p_rf_r[[plot_indices_rf[m] + 1]], p_rf_auc[[plot_indices_rf[m] + 1]], - p_rf_l[[plot_indices_rf[m] + 2]], p_rf_k[[plot_indices_rf[m] + 2]], p_rf_r[[plot_indices_rf[m] + 2]], p_rf_auc[[plot_indices_rf[m] + 2]], - ncol = 4, nrow = 3 - ) - } +# # Loop to arrange and print combined RF plots +# plot_indices_rf <- seq(1, (num_genes_RF - 1), by = 3) +# for (m in seq_along(plot_indices_rf)) { +# grid.arrange( +# p_rf_l[[plot_indices_rf[m]]], p_rf_k[[plot_indices_rf[m]]], p_rf_r[[plot_indices_rf[m]]], p_rf_auc[[plot_indices_rf[m]]], +# p_rf_l[[plot_indices_rf[m] + 1]], p_rf_k[[plot_indices_rf[m] + 1]], +# p_rf_r[[plot_indices_rf[m] + 1]], p_rf_auc[[plot_indices_rf[m] + 1]], +# p_rf_l[[plot_indices_rf[m] + 2]], p_rf_k[[plot_indices_rf[m] + 2]], +# p_rf_r[[plot_indices_rf[m] + 2]], p_rf_auc[[plot_indices_rf[m] + 2]], +# ncol = 4, nrow = 3 +# ) +# } - # Handle leftover RF plots if num_genes_RF is not a multiple of 3 - remaining_rf_plots <- num_genes_RF - max(plot_indices_rf + 2) - if (remaining_rf_plots > 0) { - plot_grid_rf_list <- lapply(seq_len(remaining_rf_plots), function(i) { - list(p_rf_l[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_k[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_r[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_auc[[plot_indices_rf[length(plot_indices_rf)] + i]]) - }) - do.call(grid.arrange, c(plot_grid_rf_list, list(ncol = 4, nrow = 3))) - } - dev.off() -} +# # Handle leftover RF plots if num_genes_RF is not a multiple of 3 +# remaining_rf_plots <- num_genes_RF - max(plot_indices_rf + 2) +# if (remaining_rf_plots > 0) { +# plot_grid_rf_list <- lapply(seq_len(remaining_rf_plots), function(i) { +# list(p_rf_l[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_k[[plot_indices_rf[length(plot_indices_rf)] + i]], +# p_rf_r[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_auc[[plot_indices_rf[length(plot_indices_rf)] + i]]) +# }) +# do.call(grid.arrange, c(plot_grid_rf_list, list(ncol = 4, nrow = 3))) +# } +# dev.off() +# } -# Calculate linear models and R-squared values for all CPPs in results 1 vs results 2 -lm_list <- list( - lm(Z_lm_K ~ Z_lm_L, data = df_na_rm), - lm(Z_lm_r ~ Z_lm_L, data = df_na_rm), - lm(Z_lm_AUC ~ Z_lm_L, data = df_na_rm), - lm(Z_lm_r ~ Z_lm_K, data = df_na_rm), - lm(Z_lm_AUC ~ Z_lm_K, data = df_na_rm), - lm(Z_lm_AUC ~ Z_lm_r, data = df_na_rm) -) +# # Calculate linear models and R-squared values for all CPPs in results 1 vs results 2 +# lm_list <- list( +# lm(Z_lm_K ~ Z_lm_L, data = df_na_rm), +# lm(Z_lm_r ~ Z_lm_L, data = df_na_rm), +# lm(Z_lm_AUC ~ Z_lm_L, data = df_na_rm), +# lm(Z_lm_r ~ Z_lm_K, data = df_na_rm), +# lm(Z_lm_AUC ~ Z_lm_K, data = df_na_rm), +# lm(Z_lm_AUC ~ Z_lm_r, data = df_na_rm) +# ) -lm_summaries <- lapply(lm_list, summary) +# lm_summaries <- lapply(lm_list, summary) -# Create PDF for correlation plots of CPPs -pdf(file.path(output_dir, "Correlation_CPPs.pdf"), width = 10, height = 7, onefile = TRUE) +# # Create PDF for correlation plots of CPPs +# pdf(file.path(output_dir, "Correlation_CPPs.pdf"), width = 10, height = 7, onefile = TRUE) -# Generate correlation plots for each combination -plot_list <- list( - ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), +# # Generate correlation plots for each combination +# plot_list <- list( +# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) + +# geom_point(shape = 3, color = "gray70") + +# geom_smooth(method = "lm", color = "tomato3") + +# ggtitle("Interaction L vs. Interaction K") + +# xlab("z-score L") + ylab("z-score K") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), - ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), +# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) + +# geom_point(shape = 3, color = "gray70") + +# geom_smooth(method = "lm", color = "tomato3") + +# ggtitle("Interaction L vs. Interaction r") + +# xlab("z-score L") + ylab("z-score r") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), - ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), +# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) + +# geom_point(shape = 3, color = "gray70") + +# geom_smooth(method = "lm", color = "tomato3") + +# ggtitle("Interaction L vs. Interaction AUC") + +# xlab("z-score L") + ylab("z-score AUC") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), - ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), +# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) + +# geom_point(shape = 3, color = "gray70") + +# geom_smooth(method = "lm", color = "tomato3") + +# ggtitle("Interaction K vs. Interaction r") + +# xlab("z-score K") + ylab("z-score r") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), - ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), +# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) + +# geom_point(shape = 3, color = "gray70") + +# geom_smooth(method = "lm", color = "tomato3") + +# ggtitle("Interaction K vs. Interaction AUC") + +# xlab("z-score K") + ylab("z-score AUC") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), - ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) + - geom_point(shape = 3, color = "gray70") + - geom_smooth(method = "lm", color = "tomato3") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)) -) +# ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) + +# geom_point(shape = 3, color = "gray70") + +# geom_smooth(method = "lm", color = "tomato3") + +# ggtitle("Interaction r vs. Interaction AUC") + +# xlab("z-score r") + ylab("z-score AUC") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)) +# ) -# Print all correlation plots to the PDF -lapply(plot_list, print) +# # Print all correlation plots to the PDF +# lapply(plot_list, print) -# Create additional plots with InteractionScores_RF highlighted in cyan -interaction_scores_rf_filtered <- interaction_scores_rf[!is.na(interaction_scores_rf$Z_lm_L), ] +# # Create additional plots with InteractionScores_RF highlighted in cyan +# interaction_scores_rf_filtered <- interaction_scores_rf[!is.na(interaction_scores_rf$Z_lm_L), ] -highlighted_plot_list <- list( - ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_K), color = "cyan") + - ggtitle("Interaction L vs. Interaction K") + - xlab("z-score L") + ylab("z-score K") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), +# highlighted_plot_list <- list( +# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) + +# geom_point(shape = 3, color = "gray70") + +# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_K), color = "cyan") + +# ggtitle("Interaction L vs. Interaction K") + +# xlab("z-score L") + ylab("z-score K") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), - ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_r), color = "cyan") + - ggtitle("Interaction L vs. Interaction r") + - xlab("z-score L") + ylab("z-score r") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), +# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) + +# geom_point(shape = 3, color = "gray70") + +# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_r), color = "cyan") + +# ggtitle("Interaction L vs. Interaction r") + +# xlab("z-score L") + ylab("z-score r") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), - ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_AUC), color = "cyan") + - ggtitle("Interaction L vs. Interaction AUC") + - xlab("z-score L") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), +# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) + +# geom_point(shape = 3, color = "gray70") + +# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_AUC), color = "cyan") + +# ggtitle("Interaction L vs. Interaction AUC") + +# xlab("z-score L") + ylab("z-score AUC") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), - ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_r), color = "cyan") + - ggtitle("Interaction K vs. Interaction r") + - xlab("z-score K") + ylab("z-score r") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), +# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) + +# geom_point(shape = 3, color = "gray70") + +# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_r), color = "cyan") + +# ggtitle("Interaction K vs. Interaction r") + +# xlab("z-score K") + ylab("z-score r") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), - ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_AUC), color = "cyan") + - ggtitle("Interaction K vs. Interaction AUC") + - xlab("z-score K") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), +# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) + +# geom_point(shape = 3, color = "gray70") + +# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_AUC), color = "cyan") + +# ggtitle("Interaction K vs. Interaction AUC") + +# xlab("z-score K") + ylab("z-score AUC") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), - ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) + - geom_point(shape = 3, color = "gray70") + - geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_r, Z_lm_AUC), color = "cyan") + - ggtitle("Interaction r vs. Interaction AUC") + - xlab("z-score r") + ylab("z-score AUC") + - annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) + - theme_Publication_legend_right() + - theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)) -) +# ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) + +# geom_point(shape = 3, color = "gray70") + +# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_r, Z_lm_AUC), color = "cyan") + +# ggtitle("Interaction r vs. Interaction AUC") + +# xlab("z-score r") + ylab("z-score AUC") + +# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) + +# theme_Publication_legend_right() + +# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), +# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), +# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)) +# ) -# Print all highlighted plots to the PDF -lapply(highlighted_plot_list, print) +# # Print all highlighted plots to the PDF +# lapply(highlighted_plot_list, print) -dev.off() +# dev.off()