Auto-commit: apps/r/calculate_interaction_zscores5.R

2024-09-01 03:03:53 -04:00
parent ed1e3cf756
commit 05cd1a85d8
1 changed files with 30 additions and 727 deletions
--- a/workflow/apps/r/calculate_interaction_zscores5.R
+++ b/workflow/apps/r/calculate_interaction_zscores5.R
@@ -133,13 +133,12 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
      DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0),
      OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep),
      conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
-      conc_num_factor = as.numeric(as.factor(conc_num)) - 1,
+      conc_num_factor = as.numeric(as.factor(conc_num)) - 1)
      max_conc = max(conc_num_factor))
  return(df)
 }
-# Function to update Gene names using the SGD gene list
+# Update Gene names using the SGD gene list
 update_gene_names <- function(df, sgd_gene_list) {
  genes <- read.delim(file = sgd_gene_list,
    quote = "", header = FALSE,
@@ -220,7 +219,7 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
  return(summary_stats)
 }
-# Function to calculate L values within and outside 2SD of K
+# Calculate L values within and outside 2SD of K
 calculate_l_2sd_of_k <- function(df, df_stats_by_k) {
  # Join the statistics to the main dataframe
  df_joined <- df %>%
@@ -260,7 +259,7 @@ save_plots <- function(file_name, plot_list, output_dir) {
  })
 }
-# Function to calculate background strain mean values
+# Calculate background strain mean values
 calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) {
  list(
    L = df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L),
@@ -270,24 +269,31 @@ calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by
  )
 }
-# Function to process strains (deletion and reference)
+# Process strains (deletion and reference)
-process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir) {
+process_strains <- function(df, l_within_2sd_k, strain, output_dir) {
  df_strains <- data.frame()  # Initialize an empty dataframe to store results
  for (concentration in unique(df$conc_num)) {
    df_temp <- df %>% filter(conc_num == concentration)
    if (concentration > 0) {
-      max_l_theoretical <- df_stats_by_l_within_2sd_k %>% filter(conc_num_factor == concentration) %>% pull(max_L)
+      max_l_theoretical <- l_within_2sd_k %>%
        filter(conc_num_factor == concentration) %>%
        pull(max_L)
      df_temp <- df_temp %>%
        mutate(
-          L = ifelse(L == 0 & !is.na(L), max_L_theoretical, L),
+          L = ifelse(L == 0 & !is.na(L), max_l_theoretical, L),
          SM = ifelse(L >= max_l_theoretical & !is.na(L), 1, SM),
          L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L)
        )
    }
-    df_strains <- bind_rows(df, df_temp)
+    df_strains <- bind_rows(df_strains, df_temp)  # Append the results of this concentration
  }
  return(df_strains)
 }
 calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
  df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
@@ -499,7 +505,6 @@ create_ranked_plots <- function(df, output_dir) {
 }
 main <- function() {
  # Applying to all experiments
  lapply(names(args$experiments), function(exp_name) {
@@ -515,6 +520,8 @@ main <- function() {
    df <- load_and_process_data(args$easy_results_file, exp_sd)
    df <- update_gene_names(df, args$sgd_gene_list)
    max_conc <- max(df$conc_num_factor)
    # QC
    # Filter the df above sd tolerance
    df_above_tolerance <- df %>% filter(DB == 1)
@@ -569,10 +576,10 @@ main <- function() {
      # Recalculate summary statistics for the background strain
      stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
-      stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
+      stats_by_l_background <- stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
-      stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
+      stats_by_k_background <- stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
-      stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
+      stats_by_r_background <- stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
-      stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
+      stats_by_auc_background <- stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
      # Backup in case previous block explodes
      # Combine all summary statistics into one dataframe
@@ -617,8 +624,8 @@ main <- function() {
        filter(OrfRep != strain) %>%
        mutate(SM = 0)
-      df_reference_strains <- process_strains(df_reference, stats_by_l_within_2sd_k, strain, out_dir)
+      df_reference_strains <- process_strains(df_reference, l_within_2sd_k, strain, out_dir)
-      df_deletion_strains <- process_strains(df_deletion, stats_by_l_within_2sd_k, strain, out_dir)
+      df_deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir)
      variables <- c("L", "K", "r", "AUC")
@@ -626,17 +633,16 @@ main <- function() {
      # Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
      # df_reference_strains <- df_reference_strains %>%
      #   mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
-
+      # We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default
      # This is synonymous with the legacy OrfRep mutation
      # Use group_by in functions in lieu of mutating OrfRep
      # default_group_vars <- c("OrfRep", "Gene", "num")
-      # Use group_by in functions in lieu of mutating OrfRep
+      reference_results <- calculate_interaction_scores(df_reference_strains, stats_by_l,
-      # We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default
+        stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
      reference_results <- calculate_interaction_scores(df_reference, stats_by_l, stats_by_k, stats_by_r, stats_by_auc,
        background_means, max_conc, variables)
-      deletion_results <- calculate_interaction_scores(df_deletion, stats_by_l, stats_by_k, stats_by_r, stats_by_auc,
+      deletion_results <- calculate_interaction_scores(df_deletion_strains, stats_by_l,
-        background_means, max_conc, variables)
+        stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
      zscores_calculations_reference <- reference_results$zscores_calculations
      zscores_interactions_reference <- reference_results$zscores_interactions
@@ -726,706 +732,3 @@ main <- function() {
  })
 }
 main()
 # NEEDS REFACTORING
 # for (s in background_strains) {
 #   Initialize empty plots (placeholder for future plotting)
 #   p_l <- ggplot()
 #   p_k <- ggplot()
 #   p_r <- ggplot()
 #   p_auc <- ggplot()
 #   p_rf_l <- ggplot()
 #   p_rf_k <- ggplot()
 #   p_rf_r <- ggplot()
 #   p_rf_auc <- ggplot()
 #   # Generate ggplot objects for each RF strain
 #   for (i in seq_len(num_genes_reference)) {
 #     gene_sel <- unique(interaction_scores_reference$OrfRep)[i]
 #     df_z_calculations <- df_stats_interaction_all_RF %>% filter(OrfRep == gene_sel)
 #     df_int_scores <- interaction_scores_RF %>% filter(OrfRep == gene_sel)
 #     p_rf_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) +
 #       geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
 #       coord_cartesian(ylim = c(-65, 65)) +
 #       geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) +
 #       ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = "      ")) +
 #       annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) +
 #       annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_L, 2))) +
 #       annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
 #       annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
 #       annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
 #       scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
 #         labels = unique(as.character(df_z_calculations$conc_num))) +
 #       scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
 #       theme_publication()
 #     p_rf_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) +
 #       geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
 #       coord_cartesian(ylim = c(-65, 65)) +
 #       geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) +
 #       ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = "      ")) +
 #       annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) +
 #       annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_K, 2))) +
 #       annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
 #       annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
 #       annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
 #       scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
 #         labels = unique(as.character(df_z_calculations$conc_num))) +
 #       scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
 #       theme_publication()
 #     p_rf_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) +
 #       geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
 #       coord_cartesian(ylim = c(-0.65, 0.65)) +
 #       geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) +
 #       ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = "      ")) +
 #       annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) +
 #       annotate("text", x = 1, y = 0.25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_r, 2))) +
 #       annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) +
 #       annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) +
 #       annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) +
 #       scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
 #         labels = unique(as.character(df_z_calculations$conc_num))) +
 #       scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) +
 #       theme_publication()
 #     p_rf_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) +
 #       geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
 #       coord_cartesian(ylim = c(-6500, 6500)) +
 #       geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) +
 #       ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = "      ")) +
 #       annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) +
 #       annotate("text", x = 1, y = 2500, label = paste("lm Zscore =", round(df_int_scores$Z_lm_AUC, 2))) +
 #       annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) +
 #       annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) +
 #       annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) +
 #       scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
 #         labels = unique(as.character(df_z_calculations$conc_num))) +
 #       scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) +
 #       theme_publication()
 #   # Loop through each gene to generate plots
 #   for (i in 1:num_genes) {
 #     gene_sel <- unique(interaction_scores_deletion$OrfRep)[i]
 #     df_z_calculations <- df_stats_interaction_all %>% filter(OrfRep == gene_sel)
 #     df_int_scores <- interaction_scores_deletion %>% filter(OrfRep == gene_sel)
 #     p_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) +
 #       geom_point() +
 #       geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
 #       coord_cartesian(ylim = c(-65, 65)) +
 #       geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) +
 #       ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = "      ")) +
 #       annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) +
 #       annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_L, 2))) +
 #       annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
 #       annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
 #       annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
 #       scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
 #         labels = unique(as.character(df_z_calculations$conc_num))) +
 #       scale_y_continuous(breaks = seq(-60, 60, 10)) +
 #       theme_Publication()
 #     p_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) +
 #       geom_point() +
 #       geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
 #       coord_cartesian(ylim = c(-65, 65)) +
 #       geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) +
 #       ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = "      ")) +
 #       annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) +
 #       annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_K, 2))) +
 #       annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
 #       annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
 #       annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
 #       scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
 #         labels = unique(as.character(df_z_calculations$conc_num))) +
 #       scale_y_continuous(breaks = seq(-60, 60, 10)) +
 #       theme_Publication()
 #     p_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) +
 #       geom_point() +
 #       geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
 #       coord_cartesian(ylim = c(-0.65, 0.65)) +
 #       geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) +
 #       ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = "      ")) +
 #       annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) +
 #       annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_r, 2))) +
 #       annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) +
 #       annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) +
 #       annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) +
 #       scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
 #         labels = unique(as.character(df_z_calculations$conc_num))) +
 #       scale_y_continuous(breaks = seq(-0.6, 0.6, 0.2)) +
 #       theme_Publication()
 #     p_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) +
 #       geom_point() +
 #       geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
 #       coord_cartesian(ylim = c(-6500, 6500)) +
 #       geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) +
 #       ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = "      ")) +
 #       annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) +
 #       annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(df_int_scores$Z_lm_AUC, 2))) +
 #       annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) +
 #       annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) +
 #       annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) +
 #       scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
 #         labels = unique(as.character(df_z_calculations$conc_num))) +
 #       scale_y_continuous(breaks = seq(-6000, 6000, 1000)) +
 #       theme_Publication()
 #     if (i == 1) {
 #       df_stats_interaction_all_final <- df_z_calculations
 #     } else {
 #       df_stats_interaction_all_final <- bind_rows(df_stats_interaction_all_final, df_z_calculations)
 #     }
 #   }
 #   print("Pass Int ggplot loop")
 #   write.csv(df_stats_interaction_all_final, file = file.path(output_dir, "ZScore_Calculations.csv"), row.names = FALSE)
 #   # Generate a blank plot for alignment purposes
 #   blank_plot <- ggplot(df2_rf) + geom_blank()
 #   # Create PDF for interaction plots
 #   pdf(file.path(output_dir, "InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE)
 #   # Summarize stats for X2_RF
 #   df_stats_rf <- df2_rf %>%
 #     group_by(conc_num, conc_num_factor) %>%
 #     summarise(
 #       mean_L = mean(L, na.rm = TRUE),
 #       median_L = median(L, na.rm = TRUE),
 #       max_L = max(L, na.rm = TRUE),
 #       min_L = min(L, na.rm = TRUE),
 #       sd_L = sd(L, na.rm = TRUE),
 #       mean_K = mean(K, na.rm = TRUE),
 #       median_K = median(K, na.rm = TRUE),
 #       max_K = max(K, na.rm = TRUE),
 #       min_K = min(K, na.rm = TRUE),
 #       sd_K = sd(K, na.rm = TRUE),
 #       mean_r = mean(r, na.rm = TRUE),
 #       median_r = median(r, na.rm = TRUE),
 #       max_r = max(r, na.rm = TRUE),
 #       min_r = min(r, na.rm = TRUE),
 #       sd_r = sd(r, na.rm = TRUE),
 #       mean_AUC = mean(AUC, na.rm = TRUE),
 #       median_AUC = median(AUC, na.rm = TRUE),
 #       max_AUC = max(AUC, na.rm = TRUE),
 #       min_AUC = min(AUC, na.rm = TRUE),
 #       sd_AUC = sd(AUC, na.rm = TRUE),
 #       NG = sum(NG, na.rm = TRUE),
 #       DB = sum(DB, na.rm = TRUE),
 #       SM = sum(SM, na.rm = TRUE)
 #     )
 #   # Create L statistics scatter plot
 #   plot_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) +
 #     geom_point(position = "jitter", size = 1) +
 #     stat_summary(
 #       fun = mean,
 #       fun.min = ~ mean(.) - sd(.),
 #       fun.max = ~ mean(.) + sd(.),
 #       geom = "errorbar", color = "red"
 #     ) +
 #     stat_summary(fun = mean, geom = "point", color = "red") +
 #     scale_x_continuous(name = unique(df$Drug[1]),
 #       breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 160)) +
 #     annotate("text", x = -0.25, y = 10, label = "NG") +
 #     annotate("text", x = -0.25, y = 5, label = "DB") +
 #     annotate("text", x = -0.25, y = 0, label = "SM") +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) +
 #     theme_Publication()
 #   # Create K statistics scatter plot
 #   plot_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) +
 #     geom_point(position = "jitter", size = 1) +
 #     stat_summary(
 #       fun = mean,
 #       fun.min = ~ mean(.) - sd(.),
 #       fun.max = ~ mean(.) + sd(.),
 #       geom = "errorbar", color = "red"
 #     ) +
 #     stat_summary(fun = mean, geom = "point", color = "red") +
 #     scale_x_continuous(name = unique(df$Drug[1]),
 #       breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(-20, 160)) +
 #     annotate("text", x = -0.25, y = -5, label = "NG") +
 #     annotate("text", x = -0.25, y = -12.5, label = "DB") +
 #     annotate("text", x = -0.25, y = -20, label = "SM") +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) +
 #     theme_Publication()
 #   # Create r statistics scatter plot
 #   plot_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) +
 #     geom_point(position = "jitter", size = 1) +
 #     stat_summary(
 #       fun = mean,
 #       fun.min = ~ mean(.) - sd(.),
 #       fun.max = ~ mean(.) + sd(.),
 #       geom = "errorbar", color = "red"
 #     ) +
 #     stat_summary(fun = mean, geom = "point", color = "red") +
 #     scale_x_continuous(name = unique(df$Drug[1]),
 #       breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 1)) +
 #     annotate("text", x = -0.25, y = .9, label = "NG") +
 #     annotate("text", x = -0.25, y = .8, label = "DB") +
 #     annotate("text", x = -0.25, y = .7, label = "SM") +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) +
 #     theme_Publication()
 #   # Create AUC statistics scatter plot
 #   plot_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) +
 #     geom_point(position = "jitter", size = 1) +
 #     stat_summary(
 #       fun = mean,
 #       fun.min = ~ mean(.) - sd(.),
 #       fun.max = ~ mean(.) + sd(.),
 #       geom = "errorbar", color = "red"
 #     ) +
 #     stat_summary(fun = mean, geom = "point", color = "red") +
 #     scale_x_continuous(name = unique(df$Drug[1]),
 #       breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 12500)) +
 #     annotate("text", x = -0.25, y = 11000, label = "NG") +
 #     annotate("text", x = -0.25, y = 10000, label = "DB") +
 #     annotate("text", x = -0.25, y = 9000, label = "SM") +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) +
 #     theme_Publication()
 #   # Arrange and plot scatter plots
 #   grid.arrange(plot_l_stats, plot_k_stats, plot_r_stats, plot_auc_stats, ncol = 2, nrow = 2)
 #   # Create box plots for each statistic
 #   plot_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) +
 #     geom_boxplot() +
 #     scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 160)) +
 #     theme_Publication()
 #   plot_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) +
 #     geom_boxplot() +
 #     scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 130)) +
 #     theme_Publication()
 #   plot_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) +
 #     geom_boxplot() +
 #     scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 1)) +
 #     theme_Publication()
 #   plot_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) +
 #     geom_boxplot() +
 #     scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 12500)) +
 #     theme_Publication()
 #   # Arrange and plot box plots
 #   grid.arrange(plot_l_stats_box, plot_k_stats_box, plot_r_stats_box, plot_auc_stats_box, ncol = 2, nrow = 2)
 #   # Loop to arrange and print combined plots
 #   plot_indices <- seq(1, (num_genes - 1), by = 3)
 #   for (m in seq_along(plot_indices)) {
 #     grid.arrange(
 #       p_l[[plot_indices[m]]], p_k[[plot_indices[m]]], p_r[[plot_indices[m]]], p_auc[[plot_indices[m]]],
 #       p_l[[plot_indices[m] + 1]], p_k[[plot_indices[m] + 1]], p_r[[plot_indices[m] + 1]], p_auc[[plot_indices[m] + 1]],
 #       p_l[[plot_indices[m] + 2]], p_k[[plot_indices[m] + 2]], p_r[[plot_indices[m] + 2]], p_auc[[plot_indices[m] + 2]],
 #       ncol = 4, nrow = 3
 #     )
 #   }
 #   # Handle leftover plots if num_genes is not a multiple of 3
 #   remaining_plots <- num_genes - max(plot_indices + 2)
 #   if (remaining_plots > 0) {
 #     plot_grid_list <- lapply(seq_len(remaining_plots), function(i) {
 #       list(p_l[[plot_indices[length(plot_indices)] + i]],
 #         p_k[[plot_indices[length(plot_indices)] + i]],
 #         p_r[[plot_indices[length(plot_indices)] + i]],
 #         p_auc[[plot_indices[length(plot_indices)] + i]])
 #     })
 #     do.call(grid.arrange, c(plot_grid_list, list(ncol = 4, nrow = 3)))
 #   }
 #   dev.off()
 #   # Additional PDF output for RF interaction plots
 #   # Generate PDF for RF interaction plots
 #   pdf(file.path(output_dir, "RF_InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE)
 #   # Summarize stats for RF data
 #   df_stats_rf <- df2_rf %>%
 #     group_by(conc_num, conc_num_factor) %>%
 #     summarise(
 #       mean_L = mean(L, na.rm = TRUE),
 #       median_L = median(L, na.rm = TRUE),
 #       max_L = max(L, na.rm = TRUE),
 #       min_L = min(L, na.rm = TRUE),
 #       sd_L = sd(L, na.rm = TRUE),
 #       mean_K = mean(K, na.rm = TRUE),
 #       median_K = median(K, na.rm = TRUE),
 #       max_K = max(K, na.rm = TRUE),
 #       min_K = min(K, na.rm = TRUE),
 #       sd_K = sd(K, na.rm = TRUE),
 #       mean_r = mean(r, na.rm = TRUE),
 #       median_r = median(r, na.rm = TRUE),
 #       max_r = max(r, na.rm = TRUE),
 #       min_r = min(r, na.rm = TRUE),
 #       sd_r = sd(r, na.rm = TRUE),
 #       mean_AUC = mean(AUC, na.rm = TRUE),
 #       median_AUC = median(AUC, na.rm = TRUE),
 #       max_AUC = max(AUC, na.rm = TRUE),
 #       min_AUC = min(AUC, na.rm = TRUE),
 #       sd_AUC = sd(AUC, na.rm = TRUE),
 #       NG = sum(NG, na.rm = TRUE),
 #       DB = sum(DB, na.rm = TRUE),
 #       SM = sum(SM, na.rm = TRUE)
 #     )
 #   # Create L statistics scatter plot for RF data
 #   plot_rf_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) +
 #     geom_point(position = "jitter", size = 1) +
 #     stat_summary(
 #       fun = mean,
 #       fun.min = ~ mean(.) - sd(.),
 #       fun.max = ~ mean(.) + sd(.),
 #       geom = "errorbar", color = "red"
 #     ) +
 #     stat_summary(fun = mean, geom = "point", color = "red") +
 #     scale_x_continuous(name = unique(df$Drug[1]),
 #       breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 130)) +
 #     annotate("text", x = -0.25, y = 10, label = "NG") +
 #     annotate("text", x = -0.25, y = 5, label = "DB") +
 #     annotate("text", x = -0.25, y = 0, label = "SM") +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) +
 #     theme_Publication()
 #   # Create K statistics scatter plot for RF data
 #   plot_rf_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) +
 #     geom_point(position = "jitter", size = 1) +
 #     stat_summary(
 #       fun = mean,
 #       fun.min = ~ mean(.) - sd(.),
 #       fun.max = ~ mean(.) + sd(.),
 #       geom = "errorbar", color = "red"
 #     ) +
 #     stat_summary(fun = mean, geom = "point", color = "red") +
 #     scale_x_continuous(name = unique(df$Drug[1]),
 #       breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(-20, 160)) +
 #     annotate("text", x = -0.25, y = -5, label = "NG") +
 #     annotate("text", x = -0.25, y = -12.5, label = "DB") +
 #     annotate("text", x = -0.25, y = -20, label = "SM") +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) +
 #     theme_Publication()
 #   # Create r statistics scatter plot for RF data
 #   plot_rf_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) +
 #     geom_point(position = "jitter", size = 1) +
 #     stat_summary(
 #       fun = mean,
 #       fun.min = ~ mean(.) - sd(.),
 #       fun.max = ~ mean(.) + sd(.),
 #       geom = "errorbar", color = "red"
 #     ) +
 #     stat_summary(fun = mean, geom = "point", color = "red") +
 #     scale_x_continuous(name = unique(df$Drug[1]),
 #       breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 1)) +
 #     annotate("text", x = -0.25, y = .9, label = "NG") +
 #     annotate("text", x = -0.25, y = .8, label = "DB") +
 #     annotate("text", x = -0.25, y = .7, label = "SM") +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) +
 #     theme_Publication()
 #   # Create AUC statistics scatter plot for RF data
 #   plot_rf_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) +
 #     geom_point(position = "jitter", size = 1) +
 #     stat_summary(
 #       fun = mean,
 #       fun.min = ~ mean(.) - sd(.),
 #       fun.max = ~ mean(.) + sd(.),
 #       geom = "errorbar", color = "red"
 #     ) +
 #     stat_summary(fun = mean, geom = "point", color = "red") +
 #     scale_x_continuous(name = unique(df$Drug[1]),
 #       breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 12500)) +
 #     annotate("text", x = -0.25, y = 11000, label = "NG") +
 #     annotate("text", x = -0.25, y = 10000, label = "DB") +
 #     annotate("text", x = -0.25, y = 9000, label = "SM") +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) +
 #     annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) +
 #     theme_Publication()
 #   # Arrange and plot RF scatter plots
 #   grid.arrange(plot_rf_l_stats, plot_rf_k_stats, plot_rf_r_stats, plot_rf_auc_stats, ncol = 2, nrow = 2)
 #   # Create box plots for each RF statistic
 #   plot_rf_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) +
 #     geom_boxplot() +
 #     scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 130)) +
 #     theme_Publication()
 #   plot_rf_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) +
 #     geom_boxplot() +
 #     scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 160)) +
 #     theme_Publication()
 #   plot_rf_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) +
 #     geom_boxplot() +
 #     scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 1)) +
 #     theme_Publication()
 #   plot_rf_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) +
 #     geom_boxplot() +
 #     scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
 #     ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
 #     coord_cartesian(ylim = c(0, 12500)) +
 #     theme_Publication()
 #   # Arrange and plot RF box plots
 #   grid.arrange(plot_rf_l_stats_box, plot_rf_k_stats_box, plot_rf_r_stats_box, plot_rf_auc_stats_box, ncol = 2, nrow = 2)
 #   # Loop to arrange and print combined RF plots
 #   plot_indices_rf <- seq(1, (num_genes_RF - 1), by = 3)
 #   for (m in seq_along(plot_indices_rf)) {
 #     grid.arrange(
 #       p_rf_l[[plot_indices_rf[m]]], p_rf_k[[plot_indices_rf[m]]], p_rf_r[[plot_indices_rf[m]]], p_rf_auc[[plot_indices_rf[m]]],
 #       p_rf_l[[plot_indices_rf[m] + 1]], p_rf_k[[plot_indices_rf[m] + 1]],
 #       p_rf_r[[plot_indices_rf[m] + 1]], p_rf_auc[[plot_indices_rf[m] + 1]],
 #       p_rf_l[[plot_indices_rf[m] + 2]], p_rf_k[[plot_indices_rf[m] + 2]],
 #       p_rf_r[[plot_indices_rf[m] + 2]], p_rf_auc[[plot_indices_rf[m] + 2]],
 #       ncol = 4, nrow = 3
 #     )
 #   }
 #   # Handle leftover RF plots if num_genes_RF is not a multiple of 3
 #   remaining_rf_plots <- num_genes_RF - max(plot_indices_rf + 2)
 #   if (remaining_rf_plots > 0) {
 #     plot_grid_rf_list <- lapply(seq_len(remaining_rf_plots), function(i) {
 #       list(p_rf_l[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_k[[plot_indices_rf[length(plot_indices_rf)] + i]],
 #         p_rf_r[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_auc[[plot_indices_rf[length(plot_indices_rf)] + i]])
 #     })
 #     do.call(grid.arrange, c(plot_grid_rf_list, list(ncol = 4, nrow = 3)))
 #   }
 #   dev.off()
 # }
 # # Calculate linear models and R-squared values for all CPPs in results 1 vs results 2
 # lm_list <- list(
 #   lm(Z_lm_K ~ Z_lm_L, data = df_na_rm),
 #   lm(Z_lm_r ~ Z_lm_L, data = df_na_rm),
 #   lm(Z_lm_AUC ~ Z_lm_L, data = df_na_rm),
 #   lm(Z_lm_r ~ Z_lm_K, data = df_na_rm),
 #   lm(Z_lm_AUC ~ Z_lm_K, data = df_na_rm),
 #   lm(Z_lm_AUC ~ Z_lm_r, data = df_na_rm)
 # )
 # lm_summaries <- lapply(lm_list, summary)
 # # Create PDF for correlation plots of CPPs
 # pdf(file.path(output_dir, "Correlation_CPPs.pdf"), width = 10, height = 7, onefile = TRUE)
 # # Generate correlation plots for each combination
 # plot_list <- list(
 #   ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_smooth(method = "lm", color = "tomato3") +
 #     ggtitle("Interaction L vs. Interaction K") +
 #     xlab("z-score L") + ylab("z-score K") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
 #   ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_smooth(method = "lm", color = "tomato3") +
 #     ggtitle("Interaction L vs. Interaction r") +
 #     xlab("z-score L") + ylab("z-score r") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
 #   ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_smooth(method = "lm", color = "tomato3") +
 #     ggtitle("Interaction L vs. Interaction AUC") +
 #     xlab("z-score L") + ylab("z-score AUC") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
 #   ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_smooth(method = "lm", color = "tomato3") +
 #     ggtitle("Interaction K vs. Interaction r") +
 #     xlab("z-score K") + ylab("z-score r") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
 #   ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_smooth(method = "lm", color = "tomato3") +
 #     ggtitle("Interaction K vs. Interaction AUC") +
 #     xlab("z-score K") + ylab("z-score AUC") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
 #   ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_smooth(method = "lm", color = "tomato3") +
 #     ggtitle("Interaction r vs. Interaction AUC") +
 #     xlab("z-score r") + ylab("z-score AUC") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18))
 # )
 # # Print all correlation plots to the PDF
 # lapply(plot_list, print)
 # # Create additional plots with InteractionScores_RF highlighted in cyan
 # interaction_scores_rf_filtered <- interaction_scores_rf[!is.na(interaction_scores_rf$Z_lm_L), ]
 # highlighted_plot_list <- list(
 #   ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_K), color = "cyan") +
 #     ggtitle("Interaction L vs. Interaction K") +
 #     xlab("z-score L") + ylab("z-score K") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
 #   ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_r), color = "cyan") +
 #     ggtitle("Interaction L vs. Interaction r") +
 #     xlab("z-score L") + ylab("z-score r") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
 #   ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_AUC), color = "cyan") +
 #     ggtitle("Interaction L vs. Interaction AUC") +
 #     xlab("z-score L") + ylab("z-score AUC") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
 #   ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_r), color = "cyan") +
 #     ggtitle("Interaction K vs. Interaction r") +
 #     xlab("z-score K") + ylab("z-score r") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
 #   ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_AUC), color = "cyan") +
 #     ggtitle("Interaction K vs. Interaction AUC") +
 #     xlab("z-score K") + ylab("z-score AUC") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
 #   ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) +
 #     geom_point(shape = 3, color = "gray70") +
 #     geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_r, Z_lm_AUC), color = "cyan") +
 #     ggtitle("Interaction r vs. Interaction AUC") +
 #     xlab("z-score r") + ylab("z-score AUC") +
 #     annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) +
 #     theme_Publication_legend_right() +
 #     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 #       axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
 #       axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18))
 # )
 # # Print all highlighted plots to the PDF
 # lapply(highlighted_plot_list, print)
 # dev.off()