diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R new file mode 100644 index 00000000..ecb4816a --- /dev/null +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -0,0 +1,1632 @@ +suppressMessages({ + library(ggplot2) + library(plotly) + library(htmlwidgets) + library(dplyr) + library(ggthemes) + library(data.table) +}) + +options(warn = 2, max.print = 1000) + +# Constants for configuration +plot_width <- 14 +plot_height <- 9 +base_size <- 14 + +parse_arguments <- function() { + args <- if (interactive()) { + c( + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1", + "Experiment 1: Doxo versus HLD", + 3, + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2", + "Experiment 2: HLD versus Doxo", + 3 + ) + } else { + commandArgs(trailingOnly = TRUE) + } + + # Extract paths, names, and standard deviations + paths <- args[seq(4, length(args), by = 3)] + names <- args[seq(5, length(args), by = 3)] + sds <- as.numeric(args[seq(6, length(args), by = 3)]) + + # Normalize paths + normalized_paths <- normalizePath(paths, mustWork = FALSE) + + # Create named list of experiments + experiments <- list() + for (i in seq_along(paths)) { + experiments[[names[i]]] <- list( + path = normalized_paths[i], + sd = sds[i] + ) + } + + list( + out_dir = normalizePath(args[1], mustWork = FALSE), + sgd_gene_list = normalizePath(args[2], mustWork = FALSE), + easy_results_file = normalizePath(args[3], mustWork = FALSE), + experiments = experiments + ) +} + +args <- parse_arguments() + +dir.create(file.path(args$out_dir, "zscores"), showWarnings = FALSE) +dir.create(file.path(args$out_dir, "zscores", "qc"), showWarnings = FALSE) + +# Define themes and scales +theme_publication <- function(base_size = base_size, base_family = "sans", legend_position = "bottom") { + theme_foundation(base_size = base_size, base_family = base_family) + + theme( + plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold", size = rel(1)), + axis.title.y = element_text(angle = 90, vjust = 2), + axis.title.x = element_text(vjust = -0.2), + axis.line = element_line(colour = "black"), + panel.grid.major = element_line(colour = "#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = legend_position, + legend.direction = ifelse(legend_position == "right", "vertical", "horizontal"), + plot.margin = unit(c(10, 5, 5, 5), "mm"), + strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), + strip.text = element_text(face = "bold") + ) +} + +scale_fill_publication <- function(...) { + discrete_scale("fill", "Publication", manual_pal(values = c( + "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" + )), ...) +} + +scale_colour_publication <- function(...) { + discrete_scale("colour", "Publication", manual_pal(values = c( + "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" + )), ...) +} + +# Load SGD gene list +sgd_genes <- function(sgd_gene_list) { + read.delim(file = sgd_gene_list, quote = "", header = FALSE, + colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>% + dplyr::rename(ORF = V4, GeneName = V5) +} + +genes <- sgd_genes(args$sgd_gene_list) + +# Load the initial dataframe from the easy_results_file +load_and_process_data <- function(easy_results_file, sd = 3) { + df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) + + # Clean and convert columns to numeric where appropriate + df <- df %>% + filter(!(pull(select(., 1)) %in% c("", "Scan"))) %>% + filter(!is.na(ORF) & ORF != "" & !Gene %in% c("BLANK", "Blank", "blank") & Drug != "BMH21") %>% + mutate( + Col = as.numeric(Col), + Row = as.numeric(Row), + L = as.numeric(l), + K = as.numeric(K), + r = as.numeric(r), + Scan = as.numeric(Scan), + AUC = as.numeric(AUC96), + last_bg = as.numeric(LstBackgrd), + first_bg = as.numeric(X1stBackgrd), + delta_bg = last_bg - first_bg, + delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + (sd * sd(delta_bg, na.rm = TRUE)), + NG = if_else(L == 0 & !is.na(L), 1, 0), + DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0), + OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), + conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)), + conc_num_factor = as.numeric(as.factor(conc_num)) - 1, + max_conc = max(conc_num_factor) + ) +} + +# Function to update Gene names using the SGD gene list +update_gene_names <- function(df, sgd_gene_list) { + genes <- read.delim(file = sgd_gene_list, + quote = "", header = FALSE, + colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) + + gene_map <- setNames(genes$V5, genes$V4) + + df <- df %>% + rowwise() %>% + mutate(Gene = ifelse(OrfRep != "YDL227C", gene_map[[ORF]], Gene)) %>% + ungroup() %>% + mutate(Gene = ifelse(Gene == "" | Gene == "OCT1", OrfRep, Gene)) +} + +generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_num", title, x_label = NULL, y_label = NULL) { + plot <- ggplot(df, aes_string(x = x_var, y = y_var, color = color_var)) + + if (plot_type == "scatter") { + plot <- plot + geom_point(shape = 3, size = 0.2, position = "jitter") + + stat_summary(fun.data = mean_sdl, geom = "errorbar") + + stat_summary(fun = mean, geom = "point", size = 0.6) + } else if (plot_type == "box") { + plot <- plot + geom_boxplot() + } else if (plot_type == "density") { + plot <- plot + geom_density() + } else if (plot_type == "bar") { + plot <- plot + geom_bar(stat = "identity") + } + + plot <- plot + ggtitle(title) + theme_publication() + + if (!is.null(x_label)) plot <- plot + xlab(x_label) + if (!is.null(y_label)) plot <- plot + ylab(y_label) + + return(plot) +} + +generate_and_save_plots <- function(df, output_dir, prefix, variables, include_qc = FALSE) { + plots <- list() + + for (var in variables) { + scatter_plot <- generate_plot(df, "Scan", var, "scatter", title = paste(prefix, "Scatter Plot for", var)) + boxplot <- generate_plot(df, "Scan", var, "box", title = paste(prefix, "Box Plot for", var)) + + plots[[paste0(var, "_scatter")]] <- scatter_plot + plots[[paste0(var, "_box")]] <- boxplot + } + + if (include_qc) { + plots[["Raw_L_vs_K"]] <- generate_plot(df, "L", "K", "scatter", title = "Raw L vs K before QC") + plots[["Delta_bg_Density"]] <- generate_plot(df, "delta_bg", NULL, "density", title = "Density plot for Delta Background by Conc All Data") + plots[["Delta_bg_Bar"]] <- generate_plot(df, "delta_bg", NULL, "bar", title = "Bar plot for Delta Background by Conc All Data") + } + + # Save the generated plots using the unified function + save_plots(prefix, plots, output_dir) +} + +# Calculate summary statistics for all variables together +calculate_summary_stats <- function(df, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) { + summary_stats <- df %>% + group_by(across(all_of(group_vars))) %>% + summarise(across(all_of(variables), list( + mean = ~mean(.x, na.rm = TRUE), + median = ~median(.x, na.rm = TRUE), + max = ~max(.x, na.rm = TRUE), + min = ~min(.x, na.rm = TRUE), + sd = ~sd(.x, na.rm = TRUE), + se = ~sd(.x, na.rm = TRUE) / sqrt(n() - 1) + ), .names = "{.col}_{.fn}")) + + return(summary_stats) +} + +# Function to calculate L values within and outside 2SD of K +calculate_l_2sd_of_k <- function(df, df_stats_by_k) { + # Join the statistics to the main dataframe + df_joined <- df %>% + filter(!is.na(L)) %>% + left_join(df_stats_by_k, by = "conc_num_factor") + + # Filter data within 2SD and outside 2SD + df_within_2sd_k <- df_joined %>% + filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K)) + + df_outside_2sd_k <- df_joined %>% + filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K)) + + # Select relevant columns to avoid duplicated columns from the join + df_within_2sd_k <- df_within_2sd_k %>% select(names(df)) + df_outside_2sd_k <- df_outside_2sd_k %>% select(names(df)) + + list(df_within_2sd_k = df_within_2sd_k, df_outside_2sd_k = df_outside_2sd_k) +} + +# Ensure all plots are saved and printed to PDF +save_plots <- function(file_name, plot_list, output_dir) { + # Save to PDF + pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) + lapply(plot_list, function(plot) { + print(plot) + }) + dev.off() + + # Save to HTML + lapply(names(plot_list), function(plot_name) { + saveWidget( + ggplotly(plot_list[[plot_name]]), + file = file.path(output_dir, + paste0(file_name, "_", plot_name, ".html")), + selfcontained = TRUE) + }) +} + +# Function to calculate background strain mean values +calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) { + list( + L = df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L), + K = df_stats_by_k %>% filter(conc_num_factor == 0) %>% pull(mean_K), + r = df_stats_by_r %>% filter(conc_num_factor == 0) %>% pull(mean_r), + AUC = df_stats_by_auc %>% filter(conc_num_factor == 0) %>% pull(mean_AUC) + ) +} + +# Function to process strains (deletion and reference) + +process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir, strain_type = "deletion") { + df_strains <- if (strain_type == "deletion") { + df %>% filter(OrfRep != strain) + } else { + df %>% filter(OrfRep == strain) + } + + df_strains <- df_strains %>% mutate(SM = 0) + + for (concentration in unique(df_strains$conc_num)) { + df_temp <- df_strains %>% filter(conc_num == concentration) + if (concentration > 0) { + max_l_theoretical <- df_stats_by_l_within_2sd_k %>% filter(conc_num_factor == concentration) %>% pull(max_L) + df_temp <- df_temp %>% + mutate( + L = ifelse(L == 0 & !is.na(L), max_L_theoretical, L), + SM = ifelse(L >= max_l_theoretical & !is.na(L), 1, SM), + L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L) + ) + } + df_strains <- bind_rows(df_strains, df_temp) + } + + return(df_strains) +} + + + + +main <- function() { + # Applying to all experiments + lapply(names(args$experiments), function(exp_name) { + exp <- args$experiments[[exp_name]] + exp_path <- exp$path + exp_sd <- exp$sd + out_dir <- file.path(exp_path, "zscores") + out_dir_qc <- file.path(exp_path, "zscores", "qc") + dir.create(out_dir, recursive = TRUE, showWarnings = FALSE) + dir.create(out_dir_qc, recursive = TRUE, showWarnings = FALSE) + + # Load and process data + df <- load_and_process_data(args$easy_results_file, exp_sd) + df <- update_gene_names(df, args$sgd_gene_list) + + # Variables for analysis + variables <- c("L", "K", "r", "AUC", "delta_bg") + + # QC + # Filter the df above sd tolerance + df_above_tolerance <- df %>% filter(DB == 1) + + # Exclude variables above delta bg tolerance + df_na <- df_above_tolerance %>% + mutate( + L = ifelse(DB == 1, NA, L), + r = ifelse(DB == 1, NA, r), + AUC = ifelse(DB == 1, NA, AUC), + K = ifelse(DB == 1, NA, K) + ) + df_no_zeros <- df_na %>% filter(L > 0) + + # Generate PDFs and HTMLs + generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE) + generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE) + generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables) + generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables) + + # TODO We may be more cpu than memory contrained at this point, not sure + rm(df_no_zeros, df_above_tolerance) + + # Summary statistics + variables <- c("L", "K", "r", "AUC") + summary_stats <- calculate_summary_stats(df_na, variables, group_vars = c("conc_num", "conc_num_factor")) + write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) + + # Calculate the pre-background stats once + df_stats <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) + df_stats_by_l <- df_stats %>% select(starts_with("L_"), all_of(group_vars)) + df_stats_by_k <- df_stats %>% select(starts_with("K_"), all_of(group_vars)) + df_stats_by_r <- df_stats %>% select(starts_with("r_"), all_of(group_vars)) + df_stats_by_auc <- df_stats %>% select(starts_with("AUC_"), all_of(group_vars)) + + # Process background strains + background_strains <- c("YDL227C") + lapply(background_strains, function(strain) { + + # Handle missing data by setting zero values to NA + # and then removing any rows with NA in L col + df_background <- df_na %>% + filter(OrfRep == strain) %>% + mutate( + L = if_else(L == 0, NA, L), + K = if_else(K == 0, NA, K), + r = if_else(r == 0, NA, r), + AUC = if_else(AUC == 0, NA, AUC) + ) %>% + filter(!is.na(L)) + + # Recalculate summary statistics for the background strain + df_stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) + df_stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), all_of(group_vars)) + df_stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), all_of(group_vars)) + df_stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), all_of(group_vars)) + df_stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), all_of(group_vars)) + + # Backup in case previous block explodes + # Combine all summary statistics into one dataframe + # df_stats_background <- stats_l %>% + # left_join(stats_k, by = c("OrfRep", "conc_num", "conc_num_factor")) %>% + # left_join(stats_r, by = c("OrfRep", "conc_num", "conc_num_factor")) %>% + # left_join(stats_auc, by = c("OrfRep", "conc_num", "conc_num_factor")) + + # Save the summary statistics for the background strain + write.csv(df_stats_background, + file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")), + row.names = FALSE) + + # Calculate L values within and outside 2SD of K + results_2sd <- calculate_l_2sd_of_k(df_background, df_stats_by_k_background) + df_within_2sd_k <- results_2sd$df_within_2sd_k + df_outside_2sd_k <- results_2sd$df_outside_2sd_k + + # Calculate summary statistics for L within and outside 2SD of K + df_stats_by_l_within_2sd_k <- calculate_summary_stats(df_within_2sd_k, "L") + write.csv(df_stats_by_l_within_2sd_k, + file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), + row.names = FALSE) + + df_stats_by_l_outside_2sd_k <- calculate_summary_stats(df_outside_2sd_k, "L") + write.csv(df_stats_by_l_outside_2sd_k, + file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), + row.names = FALSE) + + generate_and_save_plots(df_outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k") + + background_means <- calculate_background_means(df_stats_by_l_background, + df_stats_by_k_background, df_stats_by_r_background, df_stats_by_auc_background) + + df_deletion_strains <- process_strains(df_na, df_stats_by_l_within_2sd_k, strain, out_dir, "deletion") + write.csv(df_deletion_strains, + file = file.path(output_dir, paste0("Processed_", strain_type, "_strains_", strain, ".csv")), + row.names = FALSE) + + df_reference_strains <- process_strains(df_na, df_stats_by_l_within_2sd_k, strain, out_dir, "reference") + write.csv(df_strains, + file = file.path(output_dir, paste0("Processed_", strain_type, "_strains_", strain, ".csv")), + row.names = FALSE) + + # Further processing or calculations using df_deletion_strains and df_reference_strains + }) + }) +} +main() + + + + + + +for (s in background_strains) { + + + # Get the background strain mean values at the no-drug concentration (conc_num_factor = 0) + background_mean_l <- df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L) + background_mean_k <- df_stats_by_k %>% filter(conc_num_factor == 0) %>% pull(mean_K) + background_mean_r <- df_stats_by_r %>% filter(conc_num_factor == 0) %>% pull(mean_r) + background_mean_auc <- df_stats_by_auc %>% filter(conc_num_factor == 0) %>% pull(mean_AUC) + + # Initialize empty plots (placeholder for future plotting) + p_l <- ggplot() + p_k <- ggplot() + p_r <- ggplot() + p_auc <- ggplot() + p_rf_l <- ggplot() + p_rf_k <- ggplot() + p_rf_r <- ggplot() + p_rf_auc <- ggplot() + + # Get only the deletion strains (excluding the background strain) + df_deletion_strains <- df %>% filter(OrfRep != s) + + # Initialize SM (Set to Max) column + df_deletion_strains <- df_deletion_strains %>% mutate(SM = 0) + + # Set missing values to the highest theoretical value at each drug concentration for L + df_deletion_strains_new <- data.frame() + + for (i in seq_along(unique(df_deletion_strains$conc_num))) { + concentration <- unique(df_deletion_strains$conc_num)[i] + df_temp <- df_deletion_strains %>% filter(conc_num == concentration) + + if (concentration == 0) { + df_deletion_strains_new <- df_temp + message(paste("Check loop order, conc =", concentration)) + } else { + max_L_theoretical <- df_stats_by_l_within_2sd_k %>% filter(conc_num_factor == concentration) %>% pull(max_L) + + df_temp <- df_temp %>% + mutate( + L = ifelse(L == 0 & !is.na(L), max_L_theoretical, L), + SM = ifelse(L >= max_L_theoretical & !is.na(L), 1, SM), + L = ifelse(L >= max_L_theoretical & !is.na(L), max_L_theoretical, L) + ) + + df_deletion_strains_new <- bind_rows(df_deletion_strains_new, df_temp) + message(paste("Check loop order, conc =", concentration)) + } + } + + df_deletion_strains <- df_deletion_strains_new + + # Get only the reference strains (background strain) + df_reference_strains <- df %>% filter(OrfRep == s) + + # Initialize SM (Set to Max) column + df_reference_strains <- df_reference_strains %>% mutate(SM = 0) + + # Set missing values to the highest theoretical value at each drug concentration for L + df_reference_strains_new <- data.frame() + + for (i in seq_along(unique(df_reference_strains$conc_num))) { + concentration <- unique(df_reference_strains$conc_num)[i] + df_rf_temp <- df_reference_strains %>% filter(conc_num == concentration) + + if (concentration == 0) { + df_reference_strains_new <- df_rf_temp + message(paste("Check loop order, conc =", concentration)) + } else { + max_L_theoretical <- df_stats_by_l_within_2sd_k %>% filter(conc_num_factor == concentration) %>% pull(max_L) + + df_rf_temp <- df_rf_temp %>% + mutate( + L = ifelse(L == 0 & !is.na(L), max_L_theoretical, L), + SM = ifelse(L >= max_L_theoretical & !is.na(L), 1, SM), + L = ifelse(L >= max_L_theoretical & !is.na(L), max_L_theoretical, L) + ) + + df_reference_strains_new <- bind_rows(df_reference_strains_new, df_rf_temp) + message(paste("Check loop order, if error, refs have no L values outside theoretical max L, for REFs, conc =", concentration)) + } + } + + df_reference_strains <- df_reference_strains_new + + df_RF <- df_RF_new + + # Get the RF Z score values + + # Change the OrfRep column to include the RF strain, Gene name, and Num so each RF gets its own score + df_RF <- df_RF %>% + mutate(OrfRep = paste(OrfRep, Gene, Num, sep = "_")) + + num_genes_RF <- length(unique(df_RF$OrfRep)) + print(num_genes_RF) + + # Create the output dataframe containing columns for each RF strain + interaction_scores_RF <- df_RF %>% + distinct(OrfRep) %>% + mutate( + Gene = NA, + Raw_Shift_L = NA, Z_Shift_L = NA, lm_Score_L = NA, Z_lm_L = NA, R_Squared_L = NA, Sum_Z_Score_L = NA, Avg_Zscore_L = NA, + Raw_Shift_K = NA, Z_Shift_K = NA, lm_Score_K = NA, Z_lm_K = NA, R_Squared_K = NA, Sum_Z_Score_K = NA, Avg_Zscore_K = NA, + Raw_Shift_r = NA, Z_Shift_r = NA, lm_Score_r = NA, Z_lm_r = NA, R_Squared_r = NA, Sum_Z_Score_r = NA, Avg_Zscore_r = NA, + Raw_Shift_AUC = NA, Z_Shift_AUC = NA, lm_Score_AUC = NA, Z_lm_AUC = NA, R_Squared_AUC = NA, Sum_Z_Score_AUC = NA, Avg_Zscore_AUC = NA, + NG = NA, SM = NA + ) + + for (i in seq_len(num_genes_RF)) { + # Get each deletion strain ORF + gene_sel <- unique(df_RF$OrfRep)[i] + + # Extract only the current deletion strain and its data + df_gene_sel <- df_RF %>% filter(OrfRep == gene_sel) + + # Calculate summary statistics for the selected gene + df_stats_interaction <- df_gene_sel %>% + group_by(OrfRep, Gene, conc_num, conc_num_factor) %>% + summarise( + N = n(), + mean_L = mean(L, na.rm = TRUE), + median_L = median(L, na.rm = TRUE), + sd_L = sd(L, na.rm = TRUE), + se_L = sd_L / sqrt(N - 1), + mean_K = mean(K, na.rm = TRUE), + median_K = median(K, na.rm = TRUE), + sd_K = sd(K, na.rm = TRUE), + se_K = sd_K / sqrt(N - 1), + mean_r = mean(r, na.rm = TRUE), + median_r = median(r, na.rm = TRUE), + sd_r = sd(r, na.rm = TRUE), + se_r = sd_r / sqrt(N - 1), + mean_AUC = mean(AUC, na.rm = TRUE), + median_AUC = median(AUC, na.rm = TRUE), + sd_AUC = sd(AUC, na.rm = TRUE), + se_AUC = sd_AUC / sqrt(N - 1), + NG = sum(NG, na.rm = TRUE), + DB = sum(DB, na.rm = TRUE), + SM = sum(SM, na.rm = TRUE) + ) + + # Get shift values + if (is.na(df_stats_interaction$mean_L[1]) || df_stats_interaction$mean_L[1] == 0) { + df_stats_interaction <- df_stats_interaction %>% + mutate( + Raw_Shift_L = 0, Z_Shift_L = 0, + Raw_Shift_K = 0, Z_Shift_K = 0, + Raw_Shift_r = 0, Z_Shift_r = 0, + Raw_Shift_AUC = 0, Z_Shift_AUC = 0 + ) + } else { + df_stats_interaction <- df_stats_interaction %>% + mutate( + Raw_Shift_L = mean_L[1] - background_mean_l, + Z_Shift_L = Raw_Shift_L / df_stats_by_l$sd[1], + Raw_Shift_K = mean_K[1] - background_mean_k, + Z_Shift_K = Raw_Shift_K / df_stats_by_k$sd[1], + Raw_Shift_r = mean_r[1] - background_mean_r, + Z_Shift_r = Raw_Shift_r / df_stats_by_r$sd[1], + Raw_Shift_AUC = mean_AUC[1] - background_mean_auc, + Z_Shift_AUC = Raw_Shift_AUC / df_stats_by_auc$sd[1] + ) + } + + # Add wild-type (WT) values and standard deviations + df_stats_interaction <- df_stats_interaction %>% + mutate( + WT_l = df_stats_by_l$mean, WT_sd_l = df_stats_by_l$sd, + WT_K = df_stats_by_k$mean, WT_sd_K = df_stats_by_k$sd, + WT_r = df_stats_by_r$mean, WT_sd_r = df_stats_by_r$sd, + WT_AUC = df_stats_by_auc$mean, WT_sd_AUC = df_stats_by_auc$sd + ) + + # Calculate scores if there is growth at no drug concentration + if (df_stats_interaction$mean_L[1] != 0 && !is.na(df_stats_interaction$mean_L[1])) { + df_stats_interaction <- df_stats_interaction %>% + mutate( + Exp_L = WT_l + Raw_Shift_L, + Exp_K = WT_K + Raw_Shift_K, + Exp_r = WT_r + Raw_Shift_r, + Exp_AUC = WT_AUC + Raw_Shift_AUC, + Delta_L = mean_L - Exp_L, + Delta_K = mean_K - Exp_K, + Delta_r = mean_r - Exp_r, + Delta_AUC = mean_AUC - Exp_AUC, + Zscore_L = Delta_L / WT_sd_l, + Zscore_K = Delta_K / WT_sd_K, + Zscore_r = Delta_r / WT_sd_r, + Zscore_AUC = Delta_AUC / WT_sd_AUC + ) + + # Handle no growth (NG) and set to max (SM) values + if (sum(df_stats_interaction$NG, na.rm = TRUE) > 0) { + df_stats_interaction <- df_stats_interaction %>% + mutate( + Delta_L = ifelse(NG == 1, mean_L - WT_l, Delta_L), + Delta_K = ifelse(NG == 1, mean_K - WT_K, Delta_K), + Delta_r = ifelse(NG == 1, mean_r - WT_r, Delta_r), + Delta_AUC = ifelse(NG == 1, mean_AUC - WT_AUC, Delta_AUC) + ) + } + + if (sum(df_stats_interaction$SM, na.rm = TRUE) > 0) { + df_stats_interaction <- df_stats_interaction %>% + mutate(Delta_L = ifelse(SM == 1, mean_L - WT_l, Delta_L)) + } + + # Calculate linear models + gene_lm_L <- lm(Delta_L ~ conc_num_factor, data = df_stats_interaction) + gene_lm_K <- lm(Delta_K ~ conc_num_factor, data = df_stats_interaction) + gene_lm_r <- lm(Delta_r ~ conc_num_factor, data = df_stats_interaction) + gene_lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = df_stats_interaction) + + # Calculate interaction scores and R-squared values + gene_interaction_L <- max_conc * coef(gene_lm_L)[2] + coef(gene_lm_L)[1] + r_squared_l <- summary(gene_lm_L)$r.squared + gene_interaction_K <- max_conc * coef(gene_lm_K)[2] + coef(gene_lm_K)[1] + r_squared_K <- summary(gene_lm_K)$r.squared + gene_interaction_r <- max_conc * coef(gene_lm_r)[2] + coef(gene_lm_r)[1] + r_squared_r <- summary(gene_lm_r)$r.squared + gene_interaction_AUC <- max_conc * coef(gene_lm_AUC)[2] + coef(gene_lm_AUC)[1] + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + # Total non-removed concentrations + num_non_removed_conc <- Total_Conc_Nums - sum(df_stats_interaction$DB, na.rm = TRUE) - 1 + + # Report the scores + interaction_scores_RF <- interaction_scores_RF %>% + mutate( + Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_L[1]), + Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, df_stats_interaction$Z_Shift_L[1]), + lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, gene_interaction_L), + R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, r_squared_l), + Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE)), + Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE) / num_non_removed_conc), + Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_K[1]), + Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, df_stats_interaction$Z_Shift_K[1]), + lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, gene_interaction_K), + R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, r_squared_K), + Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE)), + Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE) / num_non_removed_conc), + Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_r[1]), + Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, df_stats_interaction$Z_Shift_r[1]), + lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, gene_interaction_r), + R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, r_squared_r), + Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE)), + Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE) / num_non_removed_conc), + Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_AUC[1]), + Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Z_Shift_AUC[1]), + lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, gene_interaction_AUC), + R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, r_squared_AUC), + Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE)), + Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE) / num_non_removed_conc) + ) + } else { + # Handle case where mean_L is 0 or NA + interaction_scores_RF <- interaction_scores_RF %>% + mutate( + Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, NA), + Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, NA), + lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, NA), + R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, NA), + Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, NA), + Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, NA), + Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, NA), + Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, NA), + lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, NA), + R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, NA), + Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, NA), + Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, NA), + Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, NA), + Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, NA), + lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, NA), + R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, NA), + Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, NA), + Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, NA), + Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, NA), + Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, NA), + lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, NA), + R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, NA), + Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, NA), + Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, NA) + ) + } + + # Append the interaction statistics for all RFs + if (i == 1) { + df_stats_interaction_all_RF <- df_stats_interaction + } else { + df_stats_interaction_all_RF <- bind_rows(df_stats_interaction_all_RF, df_stats_interaction) + } + + # Add NG, DB, and SM values to the InteractionScores_RF dataframe + interaction_scores_RF <- interaction_scores_RF %>% + mutate( + NG = replace(NG, OrfRep == gene_sel, sum(df_stats_interaction$NG, na.rm = TRUE)), + DB = replace(DB, OrfRep == gene_sel, sum(df_stats_interaction$DB, na.rm = TRUE)), + SM = replace(SM, OrfRep == gene_sel, sum(df_stats_interaction$SM, na.rm = TRUE)) + ) + } + + print("Pass RF Calculation loop") + + # Calculate summary statistics for the linear models + lm_sd_L <- sd(interaction_scores_RF$lm_Score_L, na.rm = TRUE) + lm_sd_K <- sd(interaction_scores_RF$lm_Score_K, na.rm = TRUE) + lm_sd_r <- sd(interaction_scores_RF$lm_Score_r, na.rm = TRUE) + lm_sd_AUC <- sd(interaction_scores_RF$lm_Score_AUC, na.rm = TRUE) + + lm_mean_L <- mean(interaction_scores_RF$lm_Score_L, na.rm = TRUE) + lm_mean_K <- mean(interaction_scores_RF$lm_Score_K, na.rm = TRUE) + lm_mean_r <- mean(interaction_scores_RF$lm_Score_r, na.rm = TRUE) + lm_mean_AUC <- mean(interaction_scores_RF$lm_Score_AUC, na.rm = TRUE) + + print(paste("Mean RF linear regression score L:", lm_mean_L)) + + # Calculate Z scores for the linear models + interaction_scores_RF <- interaction_scores_RF %>% + mutate( + Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L, + Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K, + Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r, + Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC + ) + + # Sort the dataframe by Z_lm_L and NG (No Growth) + interaction_scores_RF <- interaction_scores_RF %>% + arrange(desc(Z_lm_L), desc(NG)) + + write.csv(interaction_scores_RF, file = file.path(output_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE) + + # Generate ggplot objects for each RF strain + for (i in seq_len(num_genes_RF)) { + gene_sel <- unique(interaction_scores_RF$OrfRep)[i] + df_z_calculations <- df_stats_interaction_all_RF %>% filter(OrfRep == gene_sel) + df_int_scores <- interaction_scores_RF %>% filter(OrfRep == gene_sel) + + p_rf_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) + + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + + coord_cartesian(ylim = c(-65, 65)) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + + ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) + + annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_L, 2))) + + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + + theme_publication() + + p_rf_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) + + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + + coord_cartesian(ylim = c(-65, 65)) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) + + ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) + + annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_K, 2))) + + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + + scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) + + theme_publication() + + p_rf_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) + + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + + coord_cartesian(ylim = c(-0.65, 0.65)) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + + ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) + + annotate("text", x = 1, y = 0.25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_r, 2))) + + annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + + scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) + + theme_publication() + + p_rf_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) + + geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + + coord_cartesian(ylim = c(-6500, 6500)) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) + + ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) + + annotate("text", x = 1, y = 2500, label = paste("lm Zscore =", round(df_int_scores$Z_lm_AUC, 2))) + + annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + + scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) + + theme_publication() + + # Append the final interaction statistics for all RFs + if (i == 1) { + df_stats_interaction_all_RF_final <- df_z_calculations + } else { + df_stats_interaction_all_RF_final <- bind_rows(df_stats_interaction_all_RF_final, df_z_calculations) + } + } + + print("Pass RF ggplot loop") + + # Save the final interaction statistics + write.csv(df_stats_interaction_all_RF_final, file = file.path(output_dir, "RF_ZScore_Calculations.csv"), row.names = FALSE) + + ###### Part 5 - Get Z-scores for Gene Deletion Strains + + # Get the total number of genes for the loop + num_genes <- length(unique(df_deletion$OrfRep)) + print(num_genes) + + # Create the output dataframe containing columns for each deletion strain + interaction_scores_deletion <- unique(df_deletion["OrfRep"]) + interaction_scores_deletion <- interaction_scores_deletion %>% + mutate( + Gene = NA, + Raw_Shift_L = NA, Z_Shift_L = NA, lm_Score_L = NA, Z_lm_L = NA, R_Squared_L = NA, + Sum_Z_Score_L = NA, Avg_Zscore_L = NA, + Raw_Shift_K = NA, Z_Shift_K = NA, lm_Score_K = NA, Z_lm_K = NA, R_Squared_K = NA, + Sum_Z_Score_K = NA, Avg_Zscore_K = NA, + Raw_Shift_r = NA, Z_Shift_r = NA, lm_Score_r = NA, Z_lm_r = NA, R_Squared_r = NA, + Sum_Z_Score_r = NA, Avg_Zscore_r = NA, + Raw_Shift_AUC = NA, Z_Shift_AUC = NA, lm_Score_AUC = NA, Z_lm_AUC = NA, R_Squared_AUC = NA, + Sum_Z_Score_AUC = NA, Avg_Zscore_AUC = NA, + NG = NA, DB = NA, SM = NA + ) + + for (i in seq_len(num_genes)) { + gene_sel <- unique(df_deletion$OrfRep)[i] + df_gene_sel <- df_deletion %>% filter(OrfRep == gene_sel) + + df_stats_interaction <- df_gene_sel %>% + group_by(OrfRep, Gene, conc_num, conc_num_factor) %>% + summarise( + N = length(L), mean_L = mean(L, na.rm = TRUE), median_L = median(L, na.rm = TRUE), + sd_L = sd(L, na.rm = TRUE), se_L = sd_L / sqrt(N - 1), + mean_K = mean(K, na.rm = TRUE), median_K = median(K, na.rm = TRUE), + sd_K = sd(K, na.rm = TRUE), se_K = sd_K / sqrt(N - 1), + mean_r = mean(r, na.rm = TRUE), median_r = median(r, na.rm = TRUE), + sd_r = sd(r, na.rm = TRUE), se_r = sd_r / sqrt(N - 1), + mean_AUC = mean(AUC, na.rm = TRUE), median_AUC = median(AUC, na.rm = TRUE), + sd_AUC = sd(AUC, na.rm = TRUE), se_AUC = sd_AUC / sqrt(N - 1), + NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE) + ) %>% + ungroup() + + if (is.na(df_stats_interaction$mean_L[1]) || df_stats_interaction$mean_L[1] == 0) { + df_stats_interaction <- df_stats_interaction %>% + mutate( + Raw_Shift_L = 0, Raw_Shift_K = 0, Raw_Shift_r = 0, Raw_Shift_AUC = 0, + Z_Shift_L = 0, Z_Shift_K = 0, Z_Shift_r = 0, Z_Shift_AUC = 0 + ) + } else { + df_stats_interaction <- df_stats_interaction %>% + mutate( + Raw_Shift_L = mean_L[1] - background_L, Raw_Shift_K = mean_K[1] - background_K, + Raw_Shift_r = mean_r[1] - background_r, Raw_Shift_AUC = mean_AUC[1] - background_AUC, + Z_Shift_L = Raw_Shift_L[1] / df_stats_BY_L$sd[1], + Z_Shift_K = Raw_Shift_K[1] / df_stats_BY_K$sd[1], + Z_Shift_r = Raw_Shift_r[1] / df_stats_BY_r$sd[1], + Z_Shift_AUC = Raw_Shift_AUC[1] / df_stats_BY_AUC$sd[1] + ) + } + + df_stats_interaction <- df_stats_interaction %>% + mutate( + WT_l = df_stats_BY_L$mean, WT_K = df_stats_BY_K$mean, + WT_r = df_stats_BY_r$mean, WT_AUC = df_stats_BY_AUC$mean, + WT_sd_l = df_stats_BY_L$sd, WT_sd_K = df_stats_BY_K$sd, + WT_sd_r = df_stats_BY_r$sd, WT_sd_AUC = df_stats_BY_AUC$sd + ) + + if (df_stats_interaction$mean_L[1] != 0 && !is.na(df_stats_interaction$mean_L[1])) { + df_stats_interaction <- df_stats_interaction %>% + mutate( + Exp_L = WT_l + Raw_Shift_L, Exp_K = WT_K + Raw_Shift_K, + Exp_r = WT_r + Raw_Shift_r, Exp_AUC = WT_AUC + Raw_Shift_AUC, + Delta_L = mean_L - Exp_L, Delta_K = mean_K - Exp_K, + Delta_r = mean_r - Exp_r, Delta_AUC = mean_AUC - Exp_AUC + ) + + if (sum(df_stats_interaction$NG, na.rm = TRUE) > 0) { + df_stats_interaction <- df_stats_interaction %>% + mutate( + Delta_L = if_else(NG == 1, mean_L - WT_l, Delta_L), + Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), + Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r), + Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC) + ) + } + + if (sum(df_stats_interaction$SM, na.rm = TRUE) > 0) { + df_stats_interaction <- df_stats_interaction %>% + mutate(Delta_L = if_else(SM == 1, mean_L - WT_l, Delta_L)) + } + + df_stats_interaction <- df_stats_interaction %>% + mutate( + Zscore_L = Delta_L / WT_sd_l, Zscore_K = Delta_K / WT_sd_K, + Zscore_r = Delta_r / WT_sd_r, Zscore_AUC = Delta_AUC / WT_sd_AUC + ) + + gene_lm_L <- lm(Delta_L ~ conc_num_factor, data = df_stats_interaction) + gene_lm_K <- lm(Delta_K ~ conc_num_factor, data = df_stats_interaction) + gene_lm_r <- lm(Delta_r ~ conc_num_factor, data = df_stats_interaction) + gene_lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = df_stats_interaction) + + gene_interaction_L <- max_conc * coef(gene_lm_L)[2] + coef(gene_lm_L)[1] + gene_interaction_K <- max_conc * coef(gene_lm_K)[2] + coef(gene_lm_K)[1] + gene_interaction_r <- max_conc * coef(gene_lm_r)[2] + coef(gene_lm_r)[1] + gene_interaction_AUC <- max_conc * coef(gene_lm_AUC)[2] + coef(gene_lm_AUC)[1] + + r_squared_l <- summary(gene_lm_L)$r.squared + r_squared_K <- summary(gene_lm_K)$r.squared + r_squared_r <- summary(gene_lm_r)$r.squared + r_squared_AUC <- summary(gene_lm_AUC)$r.squared + + num_non_removed_conc <- total_conc_nums - sum(df_stats_interaction$DB, na.rm = TRUE) - 1 + + interaction_scores_deletion <- interaction_scores_deletion %>% + mutate( + Gene = replace(Gene, OrfRep == gene_sel, df_gene_sel$Gene[1]), + Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_L[1]), + Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, df_stats_interaction$Z_Shift_L[1]), + lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, gene_interaction_L), + Z_lm_L = replace(Z_lm_L, OrfRep == gene_sel, (gene_interaction_L - lm_mean_L) / lm_sd_L), + R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, r_squared_l), + Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE)), + Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_L, na.rm = TRUE) / num_non_removed_conc), + Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_K[1]), + Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, df_stats_interaction$Z_Shift_K[1]), + lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, gene_interaction_K), + Z_lm_K = replace(Z_lm_K, OrfRep == gene_sel, (gene_interaction_K - lm_mean_K) / lm_sd_K), + R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, r_squared_K), + Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE)), + Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_K, na.rm = TRUE) / num_non_removed_conc), + Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_r[1]), + Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, df_stats_interaction$Z_Shift_r[1]), + lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, gene_interaction_r), + Z_lm_r = replace(Z_lm_r, OrfRep == gene_sel, (gene_interaction_r - lm_mean_r) / lm_sd_r), + R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, r_squared_r), + Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE)), + Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_r, na.rm = TRUE) / (total_conc_nums - 1)), + Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Raw_Shift_AUC[1]), + Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, df_stats_interaction$Z_Shift_AUC[1]), + lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, gene_interaction_AUC), + Z_lm_AUC = replace(Z_lm_AUC, OrfRep == gene_sel, (gene_interaction_AUC - lm_mean_AUC) / lm_sd_AUC), + R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, r_squared_AUC), + Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE)), + Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, sum(df_stats_interaction$Zscore_AUC, na.rm = TRUE) / (total_conc_nums - 1)) + ) + } else { + # Similar logic for when mean_L is 0 or NA, setting relevant variables to NA or appropriate values + interaction_scores_deletion <- interaction_scores_deletion %>% + mutate( + Gene = replace(Gene, OrfRep == gene_sel, df_gene_sel$Gene[1]), + Raw_Shift_L = replace(Raw_Shift_L, OrfRep == gene_sel, NA), + Z_Shift_L = replace(Z_Shift_L, OrfRep == gene_sel, NA), + lm_Score_L = replace(lm_Score_L, OrfRep == gene_sel, NA), + Z_lm_L = replace(Z_lm_L, OrfRep == gene_sel, NA), + R_Squared_L = replace(R_Squared_L, OrfRep == gene_sel, NA), + Sum_Z_Score_L = replace(Sum_Z_Score_L, OrfRep == gene_sel, NA), + Avg_Zscore_L = replace(Avg_Zscore_L, OrfRep == gene_sel, NA), + Raw_Shift_K = replace(Raw_Shift_K, OrfRep == gene_sel, NA), + Z_Shift_K = replace(Z_Shift_K, OrfRep == gene_sel, NA), + lm_Score_K = replace(lm_Score_K, OrfRep == gene_sel, NA), + Z_lm_K = replace(Z_lm_K, OrfRep == gene_sel, NA), + R_Squared_K = replace(R_Squared_K, OrfRep == gene_sel, NA), + Sum_Z_Score_K = replace(Sum_Z_Score_K, OrfRep == gene_sel, NA), + Avg_Zscore_K = replace(Avg_Zscore_K, OrfRep == gene_sel, NA), + Raw_Shift_r = replace(Raw_Shift_r, OrfRep == gene_sel, NA), + Z_Shift_r = replace(Z_Shift_r, OrfRep == gene_sel, NA), + lm_Score_r = replace(lm_Score_r, OrfRep == gene_sel, NA), + Z_lm_r = replace(Z_lm_r, OrfRep == gene_sel, NA), + R_Squared_r = replace(R_Squared_r, OrfRep == gene_sel, NA), + Sum_Z_Score_r = replace(Sum_Z_Score_r, OrfRep == gene_sel, NA), + Avg_Zscore_r = replace(Avg_Zscore_r, OrfRep == gene_sel, NA), + Raw_Shift_AUC = replace(Raw_Shift_AUC, OrfRep == gene_sel, NA), + Z_Shift_AUC = replace(Z_Shift_AUC, OrfRep == gene_sel, NA), + lm_Score_AUC = replace(lm_Score_AUC, OrfRep == gene_sel, NA), + Z_lm_AUC = replace(Z_lm_AUC, OrfRep == gene_sel, NA), + R_Squared_AUC = replace(R_Squared_AUC, OrfRep == gene_sel, NA), + Sum_Z_Score_AUC = replace(Sum_Z_Score_AUC, OrfRep == gene_sel, NA), + Avg_Zscore_AUC = replace(Avg_Zscore_AUC, OrfRep == gene_sel, NA) + ) + } + + if (i == 1) { + df_stats_interaction_all <- df_stats_interaction + } else { + df_stats_interaction_all <- bind_rows(df_stats_interaction_all, df_stats_interaction) + } + + interaction_scores_deletion <- interaction_scores_deletion %>% + mutate( + NG = replace(NG, OrfRep == gene_sel, sum(df_stats_interaction$NG, na.rm = TRUE)), + DB = replace(DB, OrfRep == gene_sel, sum(df_stats_interaction$DB, na.rm = TRUE)), + SM = replace(SM, OrfRep == gene_sel, sum(df_stats_interaction$SM, na.rm = TRUE)) + ) + } + + print("Pass Int Calculation loop") + + # Order the interaction scores by Z_lm_L and NG + interaction_scores_deletion <- interaction_scores_deletion %>% + arrange(desc(Z_lm_L)) %>% + arrange(desc(NG)) + + # Save the interaction scores and filtered sets for enhancers and suppressors + output_files <- list( + "ZScores_Interaction.csv" = interaction_scores_deletion, + "ZScores_Interaction_DeletionEnhancers_L.csv" = filter(interaction_scores_deletion, Avg_Zscore_L >= 2), + "ZScores_Interaction_DeletionEnhancers_K.csv" = filter(interaction_scores_deletion, Avg_Zscore_K <= -2), + "ZScores_Interaction_DeletionSuppressors_L.csv" = filter(interaction_scores_deletion, Avg_Zscore_L <= -2), + "ZScores_Interaction_DeletionSuppressors_K.csv" = filter(interaction_scores_deletion, Avg_Zscore_K >= 2), + "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv" = filter(interaction_scores_deletion, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2), + "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv" = filter(interaction_scores_deletion, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2), + "ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv" = filter(interaction_scores_deletion, Z_lm_L >= 2 & Avg_Zscore_L <= -2), + "ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv" = filter(interaction_scores_deletion, Z_lm_L <= -2 & Avg_Zscore_L >= 2), + "ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv" = filter(interaction_scores_deletion, Z_lm_K <= -2 & Avg_Zscore_K >= 2), + "ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv" = filter(interaction_scores_deletion, Z_lm_K >= 2 & Avg_Zscore_K <= -2) + ) + + for (file_name in names(output_files)) { + write.csv(output_files[[file_name]], file = file.path(output_dir, file_name), row.names = FALSE) + } + + # Further filtering for linear regression enhancers and suppressors + output_files_lm <- list( + "ZScores_Interaction_DeletionEnhancers_L_lm.csv" = filter(interaction_scores_deletion, Z_lm_L >= 2), + "ZScores_Interaction_DeletionEnhancers_K_lm.csv" = filter(interaction_scores_deletion, Z_lm_K <= -2), + "ZScores_Interaction_DeletionSuppressors_L_lm.csv" = filter(interaction_scores_deletion, Z_lm_L <= -2), + "ZScores_Interaction_DeletionSuppressors_K_lm.csv" = filter(interaction_scores_deletion, Z_lm_K >= 2), + "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv" = filter(interaction_scores_deletion, Z_lm_L >= 2 | Z_lm_L <= -2), + "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv" = filter(interaction_scores_deletion, Z_lm_K >= 2 | Z_lm_K <= -2) + ) + + for (file_name in names(output_files_lm)) { + write.csv(output_files_lm[[file_name]], file = file.path(output_dir, file_name), row.names = FALSE) + } + + + # Loop through each gene to generate plots + for (i in 1:num_genes) { + gene_sel <- unique(interaction_scores_deletion$OrfRep)[i] + df_z_calculations <- df_stats_interaction_all %>% filter(OrfRep == gene_sel) + df_int_scores <- interaction_scores_deletion %>% filter(OrfRep == gene_sel) + + p_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) + + geom_point() + + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + + coord_cartesian(ylim = c(-65, 65)) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) + + ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) + + annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_L, 2))) + + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + + scale_y_continuous(breaks = seq(-60, 60, 10)) + + theme_Publication() + + p_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) + + geom_point() + + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + + coord_cartesian(ylim = c(-65, 65)) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) + + ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) + + annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_K, 2))) + + annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + + scale_y_continuous(breaks = seq(-60, 60, 10)) + + theme_Publication() + + p_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) + + geom_point() + + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + + coord_cartesian(ylim = c(-0.65, 0.65)) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) + + ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) + + annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_r, 2))) + + annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + + scale_y_continuous(breaks = seq(-0.6, 0.6, 0.2)) + + theme_Publication() + + p_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) + + geom_point() + + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) + + coord_cartesian(ylim = c(-6500, 6500)) + + geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) + + ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) + + annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) + + annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(df_int_scores$Z_lm_AUC, 2))) + + annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) + + annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) + + annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor), labels = unique(as.character(df_z_calculations$conc_num))) + + scale_y_continuous(breaks = seq(-6000, 6000, 1000)) + + theme_Publication() + + if (i == 1) { + df_stats_interaction_all_final <- df_z_calculations + } else { + df_stats_interaction_all_final <- bind_rows(df_stats_interaction_all_final, df_z_calculations) + } + } + + print("Pass Int ggplot loop") + write.csv(df_stats_interaction_all_final, file = file.path(output_dir, "ZScore_Calculations.csv"), row.names = FALSE) + + # Generate a blank plot for alignment purposes + blank_plot <- ggplot(df2_rf) + geom_blank() + + # Create PDF for interaction plots + pdf(file.path(output_dir, "InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE) + + # Summarize stats for X2_RF + df_stats_rf <- df2_rf %>% + group_by(conc_num, conc_num_factor) %>% + summarise( + mean_L = mean(L, na.rm = TRUE), + median_L = median(L, na.rm = TRUE), + max_L = max(L, na.rm = TRUE), + min_L = min(L, na.rm = TRUE), + sd_L = sd(L, na.rm = TRUE), + mean_K = mean(K, na.rm = TRUE), + median_K = median(K, na.rm = TRUE), + max_K = max(K, na.rm = TRUE), + min_K = min(K, na.rm = TRUE), + sd_K = sd(K, na.rm = TRUE), + mean_r = mean(r, na.rm = TRUE), + median_r = median(r, na.rm = TRUE), + max_r = max(r, na.rm = TRUE), + min_r = min(r, na.rm = TRUE), + sd_r = sd(r, na.rm = TRUE), + mean_AUC = mean(AUC, na.rm = TRUE), + median_AUC = median(AUC, na.rm = TRUE), + max_AUC = max(AUC, na.rm = TRUE), + min_AUC = min(AUC, na.rm = TRUE), + sd_AUC = sd(AUC, na.rm = TRUE), + NG = sum(NG, na.rm = TRUE), + DB = sum(DB, na.rm = TRUE), + SM = sum(SM, na.rm = TRUE) + ) + + # Create L statistics scatter plot + plot_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) + + geom_point(position = "jitter", size = 1) + + stat_summary( + fun = mean, + fun.min = ~ mean(.) - sd(.), + fun.max = ~ mean(.) + sd(.), + geom = "errorbar", color = "red" + ) + + stat_summary(fun = mean, geom = "point", color = "red") + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 160)) + + annotate("text", x = -0.25, y = 10, label = "NG") + + annotate("text", x = -0.25, y = 5, label = "DB") + + annotate("text", x = -0.25, y = 0, label = "SM") + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) + + theme_Publication() + + # Create K statistics scatter plot + plot_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) + + geom_point(position = "jitter", size = 1) + + stat_summary( + fun = mean, + fun.min = ~ mean(.) - sd(.), + fun.max = ~ mean(.) + sd(.), + geom = "errorbar", color = "red" + ) + + stat_summary(fun = mean, geom = "point", color = "red") + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + + coord_cartesian(ylim = c(-20, 160)) + + annotate("text", x = -0.25, y = -5, label = "NG") + + annotate("text", x = -0.25, y = -12.5, label = "DB") + + annotate("text", x = -0.25, y = -20, label = "SM") + + annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) + + theme_Publication() + + # Create r statistics scatter plot + plot_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + + geom_point(position = "jitter", size = 1) + + stat_summary( + fun = mean, + fun.min = ~ mean(.) - sd(.), + fun.max = ~ mean(.) + sd(.), + geom = "errorbar", color = "red" + ) + + stat_summary(fun = mean, geom = "point", color = "red") + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 1)) + + annotate("text", x = -0.25, y = .9, label = "NG") + + annotate("text", x = -0.25, y = .8, label = "DB") + + annotate("text", x = -0.25, y = .7, label = "SM") + + annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) + + theme_Publication() + + # Create AUC statistics scatter plot + plot_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) + + geom_point(position = "jitter", size = 1) + + stat_summary( + fun = mean, + fun.min = ~ mean(.) - sd(.), + fun.max = ~ mean(.) + sd(.), + geom = "errorbar", color = "red" + ) + + stat_summary(fun = mean, geom = "point", color = "red") + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 12500)) + + annotate("text", x = -0.25, y = 11000, label = "NG") + + annotate("text", x = -0.25, y = 10000, label = "DB") + + annotate("text", x = -0.25, y = 9000, label = "SM") + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) + + theme_Publication() + + # Arrange and plot scatter plots + grid.arrange(plot_l_stats, plot_k_stats, plot_r_stats, plot_auc_stats, ncol = 2, nrow = 2) + + # Create box plots for each statistic + plot_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) + + geom_boxplot() + + scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 160)) + + theme_Publication() + + plot_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) + + geom_boxplot() + + scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 130)) + + theme_Publication() + + plot_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + + geom_boxplot() + + scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 1)) + + theme_Publication() + + plot_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) + + geom_boxplot() + + scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 12500)) + + theme_Publication() + + # Arrange and plot box plots + grid.arrange(plot_l_stats_box, plot_k_stats_box, plot_r_stats_box, plot_auc_stats_box, ncol = 2, nrow = 2) + + # Loop to arrange and print combined plots + plot_indices <- seq(1, (num_genes - 1), by = 3) + for (m in seq_along(plot_indices)) { + grid.arrange( + p_l[[plot_indices[m]]], p_k[[plot_indices[m]]], p_r[[plot_indices[m]]], p_auc[[plot_indices[m]]], + p_l[[plot_indices[m] + 1]], p_k[[plot_indices[m] + 1]], p_r[[plot_indices[m] + 1]], p_auc[[plot_indices[m] + 1]], + p_l[[plot_indices[m] + 2]], p_k[[plot_indices[m] + 2]], p_r[[plot_indices[m] + 2]], p_auc[[plot_indices[m] + 2]], + ncol = 4, nrow = 3 + ) + } + + # Handle leftover plots if num_genes is not a multiple of 3 + remaining_plots <- num_genes - max(plot_indices + 2) + if (remaining_plots > 0) { + plot_grid_list <- lapply(seq_len(remaining_plots), function(i) { + list(p_l[[plot_indices[length(plot_indices)] + i]], p_k[[plot_indices[length(plot_indices)] + i]], p_r[[plot_indices[length(plot_indices)] + i]], p_auc[[plot_indices[length(plot_indices)] + i]]) + }) + do.call(grid.arrange, c(plot_grid_list, list(ncol = 4, nrow = 3))) + } + + dev.off() + + # Additional PDF output for RF interaction plots + # Generate PDF for RF interaction plots + pdf(file.path(output_dir, "RF_InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE) + + # Summarize stats for RF data + df_stats_rf <- df2_rf %>% + group_by(conc_num, conc_num_factor) %>% + summarise( + mean_L = mean(L, na.rm = TRUE), + median_L = median(L, na.rm = TRUE), + max_L = max(L, na.rm = TRUE), + min_L = min(L, na.rm = TRUE), + sd_L = sd(L, na.rm = TRUE), + mean_K = mean(K, na.rm = TRUE), + median_K = median(K, na.rm = TRUE), + max_K = max(K, na.rm = TRUE), + min_K = min(K, na.rm = TRUE), + sd_K = sd(K, na.rm = TRUE), + mean_r = mean(r, na.rm = TRUE), + median_r = median(r, na.rm = TRUE), + max_r = max(r, na.rm = TRUE), + min_r = min(r, na.rm = TRUE), + sd_r = sd(r, na.rm = TRUE), + mean_AUC = mean(AUC, na.rm = TRUE), + median_AUC = median(AUC, na.rm = TRUE), + max_AUC = max(AUC, na.rm = TRUE), + min_AUC = min(AUC, na.rm = TRUE), + sd_AUC = sd(AUC, na.rm = TRUE), + NG = sum(NG, na.rm = TRUE), + DB = sum(DB, na.rm = TRUE), + SM = sum(SM, na.rm = TRUE) + ) + + # Create L statistics scatter plot for RF data + plot_rf_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) + + geom_point(position = "jitter", size = 1) + + stat_summary( + fun = mean, + fun.min = ~ mean(.) - sd(.), + fun.max = ~ mean(.) + sd(.), + geom = "errorbar", color = "red" + ) + + stat_summary(fun = mean, geom = "point", color = "red") + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 130)) + + annotate("text", x = -0.25, y = 10, label = "NG") + + annotate("text", x = -0.25, y = 5, label = "DB") + + annotate("text", x = -0.25, y = 0, label = "SM") + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) + + theme_Publication() + + # Create K statistics scatter plot for RF data + plot_rf_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) + + geom_point(position = "jitter", size = 1) + + stat_summary( + fun = mean, + fun.min = ~ mean(.) - sd(.), + fun.max = ~ mean(.) + sd(.), + geom = "errorbar", color = "red" + ) + + stat_summary(fun = mean, geom = "point", color = "red") + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + + coord_cartesian(ylim = c(-20, 160)) + + annotate("text", x = -0.25, y = -5, label = "NG") + + annotate("text", x = -0.25, y = -12.5, label = "DB") + + annotate("text", x = -0.25, y = -20, label = "SM") + + annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) + + theme_Publication() + + # Create r statistics scatter plot for RF data + plot_rf_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) + + geom_point(position = "jitter", size = 1) + + stat_summary( + fun = mean, + fun.min = ~ mean(.) - sd(.), + fun.max = ~ mean(.) + sd(.), + geom = "errorbar", color = "red" + ) + + stat_summary(fun = mean, geom = "point", color = "red") + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 1)) + + annotate("text", x = -0.25, y = .9, label = "NG") + + annotate("text", x = -0.25, y = .8, label = "DB") + + annotate("text", x = -0.25, y = .7, label = "SM") + + annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) + + theme_Publication() + + # Create AUC statistics scatter plot for RF data + plot_rf_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) + + geom_point(position = "jitter", size = 1) + + stat_summary( + fun = mean, + fun.min = ~ mean(.) - sd(.), + fun.max = ~ mean(.) + sd(.), + geom = "errorbar", color = "red" + ) + + stat_summary(fun = mean, geom = "point", color = "red") + + scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 12500)) + + annotate("text", x = -0.25, y = 11000, label = "NG") + + annotate("text", x = -0.25, y = 10000, label = "DB") + + annotate("text", x = -0.25, y = 9000, label = "SM") + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) + + annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) + + theme_Publication() + + # Arrange and plot RF scatter plots + grid.arrange(plot_rf_l_stats, plot_rf_k_stats, plot_rf_r_stats, plot_rf_auc_stats, ncol = 2, nrow = 2) + + # Create box plots for each RF statistic + plot_rf_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) + + geom_boxplot() + + scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 130)) + + theme_Publication() + + plot_rf_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) + + geom_boxplot() + + scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 160)) + + theme_Publication() + + plot_rf_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) + + geom_boxplot() + + scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 1)) + + theme_Publication() + + plot_rf_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) + + geom_boxplot() + + scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) + + ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) + + coord_cartesian(ylim = c(0, 12500)) + + theme_Publication() + + # Arrange and plot RF box plots + grid.arrange(plot_rf_l_stats_box, plot_rf_k_stats_box, plot_rf_r_stats_box, plot_rf_auc_stats_box, ncol = 2, nrow = 2) + + # Loop to arrange and print combined RF plots + plot_indices_rf <- seq(1, (num_genes_RF - 1), by = 3) + for (m in seq_along(plot_indices_rf)) { + grid.arrange( + p_rf_l[[plot_indices_rf[m]]], p_rf_k[[plot_indices_rf[m]]], p_rf_r[[plot_indices_rf[m]]], p_rf_auc[[plot_indices_rf[m]]], + p_rf_l[[plot_indices_rf[m] + 1]], p_rf_k[[plot_indices_rf[m] + 1]], p_rf_r[[plot_indices_rf[m] + 1]], p_rf_auc[[plot_indices_rf[m] + 1]], + p_rf_l[[plot_indices_rf[m] + 2]], p_rf_k[[plot_indices_rf[m] + 2]], p_rf_r[[plot_indices_rf[m] + 2]], p_rf_auc[[plot_indices_rf[m] + 2]], + ncol = 4, nrow = 3 + ) + } + + # Handle leftover RF plots if num_genes_RF is not a multiple of 3 + remaining_rf_plots <- num_genes_RF - max(plot_indices_rf + 2) + if (remaining_rf_plots > 0) { + plot_grid_rf_list <- lapply(seq_len(remaining_rf_plots), function(i) { + list(p_rf_l[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_k[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_r[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_auc[[plot_indices_rf[length(plot_indices_rf)] + i]]) + }) + do.call(grid.arrange, c(plot_grid_rf_list, list(ncol = 4, nrow = 3))) + } + dev.off() +} + +# Calculate linear models and R-squared values for all CPPs in results 1 vs results 2 +lm_list <- list( + lm(Z_lm_K ~ Z_lm_L, data = df_na_rm), + lm(Z_lm_r ~ Z_lm_L, data = df_na_rm), + lm(Z_lm_AUC ~ Z_lm_L, data = df_na_rm), + lm(Z_lm_r ~ Z_lm_K, data = df_na_rm), + lm(Z_lm_AUC ~ Z_lm_K, data = df_na_rm), + lm(Z_lm_AUC ~ Z_lm_r, data = df_na_rm) +) + +lm_summaries <- lapply(lm_list, summary) + +# Create PDF for correlation plots of CPPs +pdf(file.path(output_dir, "Correlation_CPPs.pdf"), width = 10, height = 7, onefile = TRUE) + +# Generate correlation plots for each combination +plot_list <- list( + ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) + + geom_point(shape = 3, color = "gray70") + + geom_smooth(method = "lm", color = "tomato3") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), + + ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) + + geom_point(shape = 3, color = "gray70") + + geom_smooth(method = "lm", color = "tomato3") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), + + ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) + + geom_point(shape = 3, color = "gray70") + + geom_smooth(method = "lm", color = "tomato3") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), + + ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) + + geom_point(shape = 3, color = "gray70") + + geom_smooth(method = "lm", color = "tomato3") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), + + ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) + + geom_point(shape = 3, color = "gray70") + + geom_smooth(method = "lm", color = "tomato3") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), + + ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) + + geom_point(shape = 3, color = "gray70") + + geom_smooth(method = "lm", color = "tomato3") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)) +) + +# Print all correlation plots to the PDF +lapply(plot_list, print) + +# Create additional plots with InteractionScores_RF highlighted in cyan +interaction_scores_rf_filtered <- interaction_scores_rf[!is.na(interaction_scores_rf$Z_lm_L), ] + +highlighted_plot_list <- list( + ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) + + geom_point(shape = 3, color = "gray70") + + geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_K), color = "cyan") + + ggtitle("Interaction L vs. Interaction K") + + xlab("z-score L") + ylab("z-score K") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), + + ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) + + geom_point(shape = 3, color = "gray70") + + geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_r), color = "cyan") + + ggtitle("Interaction L vs. Interaction r") + + xlab("z-score L") + ylab("z-score r") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), + + ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) + + geom_point(shape = 3, color = "gray70") + + geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_AUC), color = "cyan") + + ggtitle("Interaction L vs. Interaction AUC") + + xlab("z-score L") + ylab("z-score AUC") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), + + ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) + + geom_point(shape = 3, color = "gray70") + + geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_r), color = "cyan") + + ggtitle("Interaction K vs. Interaction r") + + xlab("z-score K") + ylab("z-score r") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), + + ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) + + geom_point(shape = 3, color = "gray70") + + geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_AUC), color = "cyan") + + ggtitle("Interaction K vs. Interaction AUC") + + xlab("z-score K") + ylab("z-score AUC") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)), + + ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) + + geom_point(shape = 3, color = "gray70") + + geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_r, Z_lm_AUC), color = "cyan") + + ggtitle("Interaction r vs. Interaction AUC") + + xlab("z-score r") + ylab("z-score AUC") + + annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) + + theme_Publication_legend_right() + + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)) +) + +# Print all highlighted plots to the PDF +lapply(highlighted_plot_list, print) + +dev.off()