Auto-commit: apps/r/calculate_interaction_zscores5.R

This commit is contained in:
2024-09-01 03:03:53 -04:00
parent ed1e3cf756
commit 05cd1a85d8

View File

@@ -133,13 +133,12 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0), DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0),
OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep),
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)), conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
conc_num_factor = as.numeric(as.factor(conc_num)) - 1, conc_num_factor = as.numeric(as.factor(conc_num)) - 1)
max_conc = max(conc_num_factor))
return(df) return(df)
} }
# Function to update Gene names using the SGD gene list # Update Gene names using the SGD gene list
update_gene_names <- function(df, sgd_gene_list) { update_gene_names <- function(df, sgd_gene_list) {
genes <- read.delim(file = sgd_gene_list, genes <- read.delim(file = sgd_gene_list,
quote = "", header = FALSE, quote = "", header = FALSE,
@@ -220,7 +219,7 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
return(summary_stats) return(summary_stats)
} }
# Function to calculate L values within and outside 2SD of K # Calculate L values within and outside 2SD of K
calculate_l_2sd_of_k <- function(df, df_stats_by_k) { calculate_l_2sd_of_k <- function(df, df_stats_by_k) {
# Join the statistics to the main dataframe # Join the statistics to the main dataframe
df_joined <- df %>% df_joined <- df %>%
@@ -260,7 +259,7 @@ save_plots <- function(file_name, plot_list, output_dir) {
}) })
} }
# Function to calculate background strain mean values # Calculate background strain mean values
calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) { calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) {
list( list(
L = df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L), L = df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L),
@@ -270,24 +269,31 @@ calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by
) )
} }
# Function to process strains (deletion and reference) # Process strains (deletion and reference)
process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir) { process_strains <- function(df, l_within_2sd_k, strain, output_dir) {
df_strains <- data.frame() # Initialize an empty dataframe to store results
for (concentration in unique(df$conc_num)) { for (concentration in unique(df$conc_num)) {
df_temp <- df %>% filter(conc_num == concentration) df_temp <- df %>% filter(conc_num == concentration)
if (concentration > 0) { if (concentration > 0) {
max_l_theoretical <- df_stats_by_l_within_2sd_k %>% filter(conc_num_factor == concentration) %>% pull(max_L) max_l_theoretical <- l_within_2sd_k %>%
filter(conc_num_factor == concentration) %>%
pull(max_L)
df_temp <- df_temp %>% df_temp <- df_temp %>%
mutate( mutate(
L = ifelse(L == 0 & !is.na(L), max_L_theoretical, L), L = ifelse(L == 0 & !is.na(L), max_l_theoretical, L),
SM = ifelse(L >= max_l_theoretical & !is.na(L), 1, SM), SM = ifelse(L >= max_l_theoretical & !is.na(L), 1, SM),
L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L) L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L)
) )
} }
df_strains <- bind_rows(df, df_temp) df_strains <- bind_rows(df_strains, df_temp) # Append the results of this concentration
} }
return(df_strains) return(df_strains)
} }
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
@@ -499,7 +505,6 @@ create_ranked_plots <- function(df, output_dir) {
} }
main <- function() { main <- function() {
# Applying to all experiments # Applying to all experiments
lapply(names(args$experiments), function(exp_name) { lapply(names(args$experiments), function(exp_name) {
@@ -515,6 +520,8 @@ main <- function() {
df <- load_and_process_data(args$easy_results_file, exp_sd) df <- load_and_process_data(args$easy_results_file, exp_sd)
df <- update_gene_names(df, args$sgd_gene_list) df <- update_gene_names(df, args$sgd_gene_list)
max_conc <- max(df$conc_num_factor)
# QC # QC
# Filter the df above sd tolerance # Filter the df above sd tolerance
df_above_tolerance <- df %>% filter(DB == 1) df_above_tolerance <- df %>% filter(DB == 1)
@@ -569,10 +576,10 @@ main <- function() {
# Recalculate summary statistics for the background strain # Recalculate summary statistics for the background strain
stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")) stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_l_background <- stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_k_background <- stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_r_background <- stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor") stats_by_auc_background <- stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
# Backup in case previous block explodes # Backup in case previous block explodes
# Combine all summary statistics into one dataframe # Combine all summary statistics into one dataframe
@@ -617,8 +624,8 @@ main <- function() {
filter(OrfRep != strain) %>% filter(OrfRep != strain) %>%
mutate(SM = 0) mutate(SM = 0)
df_reference_strains <- process_strains(df_reference, stats_by_l_within_2sd_k, strain, out_dir) df_reference_strains <- process_strains(df_reference, l_within_2sd_k, strain, out_dir)
df_deletion_strains <- process_strains(df_deletion, stats_by_l_within_2sd_k, strain, out_dir) df_deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir)
variables <- c("L", "K", "r", "AUC") variables <- c("L", "K", "r", "AUC")
@@ -626,17 +633,16 @@ main <- function() {
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score # Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
# df_reference_strains <- df_reference_strains %>% # df_reference_strains <- df_reference_strains %>%
# mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_")) # mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
# We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default
# This is synonymous with the legacy OrfRep mutation # This is synonymous with the legacy OrfRep mutation
# Use group_by in functions in lieu of mutating OrfRep
# default_group_vars <- c("OrfRep", "Gene", "num") # default_group_vars <- c("OrfRep", "Gene", "num")
# Use group_by in functions in lieu of mutating OrfRep reference_results <- calculate_interaction_scores(df_reference_strains, stats_by_l,
# We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
reference_results <- calculate_interaction_scores(df_reference, stats_by_l, stats_by_k, stats_by_r, stats_by_auc,
background_means, max_conc, variables)
deletion_results <- calculate_interaction_scores(df_deletion, stats_by_l, stats_by_k, stats_by_r, stats_by_auc, deletion_results <- calculate_interaction_scores(df_deletion_strains, stats_by_l,
background_means, max_conc, variables) stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
zscores_calculations_reference <- reference_results$zscores_calculations zscores_calculations_reference <- reference_results$zscores_calculations
zscores_interactions_reference <- reference_results$zscores_interactions zscores_interactions_reference <- reference_results$zscores_interactions
@@ -726,706 +732,3 @@ main <- function() {
}) })
} }
main() main()
# NEEDS REFACTORING
# for (s in background_strains) {
# Initialize empty plots (placeholder for future plotting)
# p_l <- ggplot()
# p_k <- ggplot()
# p_r <- ggplot()
# p_auc <- ggplot()
# p_rf_l <- ggplot()
# p_rf_k <- ggplot()
# p_rf_r <- ggplot()
# p_rf_auc <- ggplot()
# # Generate ggplot objects for each RF strain
# for (i in seq_len(num_genes_reference)) {
# gene_sel <- unique(interaction_scores_reference$OrfRep)[i]
# df_z_calculations <- df_stats_interaction_all_RF %>% filter(OrfRep == gene_sel)
# df_int_scores <- interaction_scores_RF %>% filter(OrfRep == gene_sel)
# p_rf_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) +
# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# coord_cartesian(ylim = c(-65, 65)) +
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) +
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) +
# annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_L, 2))) +
# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
# labels = unique(as.character(df_z_calculations$conc_num))) +
# scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
# theme_publication()
# p_rf_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) +
# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# coord_cartesian(ylim = c(-65, 65)) +
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) +
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) +
# annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_K, 2))) +
# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
# labels = unique(as.character(df_z_calculations$conc_num))) +
# scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
# theme_publication()
# p_rf_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) +
# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# coord_cartesian(ylim = c(-0.65, 0.65)) +
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) +
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
# annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) +
# annotate("text", x = 1, y = 0.25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_r, 2))) +
# annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) +
# annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) +
# annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) +
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
# labels = unique(as.character(df_z_calculations$conc_num))) +
# scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) +
# theme_publication()
# p_rf_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) +
# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# coord_cartesian(ylim = c(-6500, 6500)) +
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) +
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
# annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) +
# annotate("text", x = 1, y = 2500, label = paste("lm Zscore =", round(df_int_scores$Z_lm_AUC, 2))) +
# annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) +
# annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) +
# annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) +
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
# labels = unique(as.character(df_z_calculations$conc_num))) +
# scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) +
# theme_publication()
# # Loop through each gene to generate plots
# for (i in 1:num_genes) {
# gene_sel <- unique(interaction_scores_deletion$OrfRep)[i]
# df_z_calculations <- df_stats_interaction_all %>% filter(OrfRep == gene_sel)
# df_int_scores <- interaction_scores_deletion %>% filter(OrfRep == gene_sel)
# p_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) +
# geom_point() +
# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# coord_cartesian(ylim = c(-65, 65)) +
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) +
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) +
# annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_L, 2))) +
# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
# labels = unique(as.character(df_z_calculations$conc_num))) +
# scale_y_continuous(breaks = seq(-60, 60, 10)) +
# theme_Publication()
# p_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) +
# geom_point() +
# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# coord_cartesian(ylim = c(-65, 65)) +
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) +
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) +
# annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_K, 2))) +
# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
# labels = unique(as.character(df_z_calculations$conc_num))) +
# scale_y_continuous(breaks = seq(-60, 60, 10)) +
# theme_Publication()
# p_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) +
# geom_point() +
# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# coord_cartesian(ylim = c(-0.65, 0.65)) +
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) +
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
# annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) +
# annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_r, 2))) +
# annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) +
# annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) +
# annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) +
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
# labels = unique(as.character(df_z_calculations$conc_num))) +
# scale_y_continuous(breaks = seq(-0.6, 0.6, 0.2)) +
# theme_Publication()
# p_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) +
# geom_point() +
# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
# coord_cartesian(ylim = c(-6500, 6500)) +
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) +
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
# annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) +
# annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(df_int_scores$Z_lm_AUC, 2))) +
# annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) +
# annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) +
# annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) +
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
# labels = unique(as.character(df_z_calculations$conc_num))) +
# scale_y_continuous(breaks = seq(-6000, 6000, 1000)) +
# theme_Publication()
# if (i == 1) {
# df_stats_interaction_all_final <- df_z_calculations
# } else {
# df_stats_interaction_all_final <- bind_rows(df_stats_interaction_all_final, df_z_calculations)
# }
# }
# print("Pass Int ggplot loop")
# write.csv(df_stats_interaction_all_final, file = file.path(output_dir, "ZScore_Calculations.csv"), row.names = FALSE)
# # Generate a blank plot for alignment purposes
# blank_plot <- ggplot(df2_rf) + geom_blank()
# # Create PDF for interaction plots
# pdf(file.path(output_dir, "InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE)
# # Summarize stats for X2_RF
# df_stats_rf <- df2_rf %>%
# group_by(conc_num, conc_num_factor) %>%
# summarise(
# mean_L = mean(L, na.rm = TRUE),
# median_L = median(L, na.rm = TRUE),
# max_L = max(L, na.rm = TRUE),
# min_L = min(L, na.rm = TRUE),
# sd_L = sd(L, na.rm = TRUE),
# mean_K = mean(K, na.rm = TRUE),
# median_K = median(K, na.rm = TRUE),
# max_K = max(K, na.rm = TRUE),
# min_K = min(K, na.rm = TRUE),
# sd_K = sd(K, na.rm = TRUE),
# mean_r = mean(r, na.rm = TRUE),
# median_r = median(r, na.rm = TRUE),
# max_r = max(r, na.rm = TRUE),
# min_r = min(r, na.rm = TRUE),
# sd_r = sd(r, na.rm = TRUE),
# mean_AUC = mean(AUC, na.rm = TRUE),
# median_AUC = median(AUC, na.rm = TRUE),
# max_AUC = max(AUC, na.rm = TRUE),
# min_AUC = min(AUC, na.rm = TRUE),
# sd_AUC = sd(AUC, na.rm = TRUE),
# NG = sum(NG, na.rm = TRUE),
# DB = sum(DB, na.rm = TRUE),
# SM = sum(SM, na.rm = TRUE)
# )
# # Create L statistics scatter plot
# plot_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) +
# geom_point(position = "jitter", size = 1) +
# stat_summary(
# fun = mean,
# fun.min = ~ mean(.) - sd(.),
# fun.max = ~ mean(.) + sd(.),
# geom = "errorbar", color = "red"
# ) +
# stat_summary(fun = mean, geom = "point", color = "red") +
# scale_x_continuous(name = unique(df$Drug[1]),
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 160)) +
# annotate("text", x = -0.25, y = 10, label = "NG") +
# annotate("text", x = -0.25, y = 5, label = "DB") +
# annotate("text", x = -0.25, y = 0, label = "SM") +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) +
# theme_Publication()
# # Create K statistics scatter plot
# plot_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) +
# geom_point(position = "jitter", size = 1) +
# stat_summary(
# fun = mean,
# fun.min = ~ mean(.) - sd(.),
# fun.max = ~ mean(.) + sd(.),
# geom = "errorbar", color = "red"
# ) +
# stat_summary(fun = mean, geom = "point", color = "red") +
# scale_x_continuous(name = unique(df$Drug[1]),
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
# coord_cartesian(ylim = c(-20, 160)) +
# annotate("text", x = -0.25, y = -5, label = "NG") +
# annotate("text", x = -0.25, y = -12.5, label = "DB") +
# annotate("text", x = -0.25, y = -20, label = "SM") +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) +
# theme_Publication()
# # Create r statistics scatter plot
# plot_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) +
# geom_point(position = "jitter", size = 1) +
# stat_summary(
# fun = mean,
# fun.min = ~ mean(.) - sd(.),
# fun.max = ~ mean(.) + sd(.),
# geom = "errorbar", color = "red"
# ) +
# stat_summary(fun = mean, geom = "point", color = "red") +
# scale_x_continuous(name = unique(df$Drug[1]),
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 1)) +
# annotate("text", x = -0.25, y = .9, label = "NG") +
# annotate("text", x = -0.25, y = .8, label = "DB") +
# annotate("text", x = -0.25, y = .7, label = "SM") +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) +
# theme_Publication()
# # Create AUC statistics scatter plot
# plot_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) +
# geom_point(position = "jitter", size = 1) +
# stat_summary(
# fun = mean,
# fun.min = ~ mean(.) - sd(.),
# fun.max = ~ mean(.) + sd(.),
# geom = "errorbar", color = "red"
# ) +
# stat_summary(fun = mean, geom = "point", color = "red") +
# scale_x_continuous(name = unique(df$Drug[1]),
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 12500)) +
# annotate("text", x = -0.25, y = 11000, label = "NG") +
# annotate("text", x = -0.25, y = 10000, label = "DB") +
# annotate("text", x = -0.25, y = 9000, label = "SM") +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) +
# theme_Publication()
# # Arrange and plot scatter plots
# grid.arrange(plot_l_stats, plot_k_stats, plot_r_stats, plot_auc_stats, ncol = 2, nrow = 2)
# # Create box plots for each statistic
# plot_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) +
# geom_boxplot() +
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 160)) +
# theme_Publication()
# plot_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) +
# geom_boxplot() +
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 130)) +
# theme_Publication()
# plot_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) +
# geom_boxplot() +
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 1)) +
# theme_Publication()
# plot_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) +
# geom_boxplot() +
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 12500)) +
# theme_Publication()
# # Arrange and plot box plots
# grid.arrange(plot_l_stats_box, plot_k_stats_box, plot_r_stats_box, plot_auc_stats_box, ncol = 2, nrow = 2)
# # Loop to arrange and print combined plots
# plot_indices <- seq(1, (num_genes - 1), by = 3)
# for (m in seq_along(plot_indices)) {
# grid.arrange(
# p_l[[plot_indices[m]]], p_k[[plot_indices[m]]], p_r[[plot_indices[m]]], p_auc[[plot_indices[m]]],
# p_l[[plot_indices[m] + 1]], p_k[[plot_indices[m] + 1]], p_r[[plot_indices[m] + 1]], p_auc[[plot_indices[m] + 1]],
# p_l[[plot_indices[m] + 2]], p_k[[plot_indices[m] + 2]], p_r[[plot_indices[m] + 2]], p_auc[[plot_indices[m] + 2]],
# ncol = 4, nrow = 3
# )
# }
# # Handle leftover plots if num_genes is not a multiple of 3
# remaining_plots <- num_genes - max(plot_indices + 2)
# if (remaining_plots > 0) {
# plot_grid_list <- lapply(seq_len(remaining_plots), function(i) {
# list(p_l[[plot_indices[length(plot_indices)] + i]],
# p_k[[plot_indices[length(plot_indices)] + i]],
# p_r[[plot_indices[length(plot_indices)] + i]],
# p_auc[[plot_indices[length(plot_indices)] + i]])
# })
# do.call(grid.arrange, c(plot_grid_list, list(ncol = 4, nrow = 3)))
# }
# dev.off()
# # Additional PDF output for RF interaction plots
# # Generate PDF for RF interaction plots
# pdf(file.path(output_dir, "RF_InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE)
# # Summarize stats for RF data
# df_stats_rf <- df2_rf %>%
# group_by(conc_num, conc_num_factor) %>%
# summarise(
# mean_L = mean(L, na.rm = TRUE),
# median_L = median(L, na.rm = TRUE),
# max_L = max(L, na.rm = TRUE),
# min_L = min(L, na.rm = TRUE),
# sd_L = sd(L, na.rm = TRUE),
# mean_K = mean(K, na.rm = TRUE),
# median_K = median(K, na.rm = TRUE),
# max_K = max(K, na.rm = TRUE),
# min_K = min(K, na.rm = TRUE),
# sd_K = sd(K, na.rm = TRUE),
# mean_r = mean(r, na.rm = TRUE),
# median_r = median(r, na.rm = TRUE),
# max_r = max(r, na.rm = TRUE),
# min_r = min(r, na.rm = TRUE),
# sd_r = sd(r, na.rm = TRUE),
# mean_AUC = mean(AUC, na.rm = TRUE),
# median_AUC = median(AUC, na.rm = TRUE),
# max_AUC = max(AUC, na.rm = TRUE),
# min_AUC = min(AUC, na.rm = TRUE),
# sd_AUC = sd(AUC, na.rm = TRUE),
# NG = sum(NG, na.rm = TRUE),
# DB = sum(DB, na.rm = TRUE),
# SM = sum(SM, na.rm = TRUE)
# )
# # Create L statistics scatter plot for RF data
# plot_rf_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) +
# geom_point(position = "jitter", size = 1) +
# stat_summary(
# fun = mean,
# fun.min = ~ mean(.) - sd(.),
# fun.max = ~ mean(.) + sd(.),
# geom = "errorbar", color = "red"
# ) +
# stat_summary(fun = mean, geom = "point", color = "red") +
# scale_x_continuous(name = unique(df$Drug[1]),
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 130)) +
# annotate("text", x = -0.25, y = 10, label = "NG") +
# annotate("text", x = -0.25, y = 5, label = "DB") +
# annotate("text", x = -0.25, y = 0, label = "SM") +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) +
# theme_Publication()
# # Create K statistics scatter plot for RF data
# plot_rf_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) +
# geom_point(position = "jitter", size = 1) +
# stat_summary(
# fun = mean,
# fun.min = ~ mean(.) - sd(.),
# fun.max = ~ mean(.) + sd(.),
# geom = "errorbar", color = "red"
# ) +
# stat_summary(fun = mean, geom = "point", color = "red") +
# scale_x_continuous(name = unique(df$Drug[1]),
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
# coord_cartesian(ylim = c(-20, 160)) +
# annotate("text", x = -0.25, y = -5, label = "NG") +
# annotate("text", x = -0.25, y = -12.5, label = "DB") +
# annotate("text", x = -0.25, y = -20, label = "SM") +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) +
# theme_Publication()
# # Create r statistics scatter plot for RF data
# plot_rf_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) +
# geom_point(position = "jitter", size = 1) +
# stat_summary(
# fun = mean,
# fun.min = ~ mean(.) - sd(.),
# fun.max = ~ mean(.) + sd(.),
# geom = "errorbar", color = "red"
# ) +
# stat_summary(fun = mean, geom = "point", color = "red") +
# scale_x_continuous(name = unique(df$Drug[1]),
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 1)) +
# annotate("text", x = -0.25, y = .9, label = "NG") +
# annotate("text", x = -0.25, y = .8, label = "DB") +
# annotate("text", x = -0.25, y = .7, label = "SM") +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) +
# theme_Publication()
# # Create AUC statistics scatter plot for RF data
# plot_rf_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) +
# geom_point(position = "jitter", size = 1) +
# stat_summary(
# fun = mean,
# fun.min = ~ mean(.) - sd(.),
# fun.max = ~ mean(.) + sd(.),
# geom = "errorbar", color = "red"
# ) +
# stat_summary(fun = mean, geom = "point", color = "red") +
# scale_x_continuous(name = unique(df$Drug[1]),
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 12500)) +
# annotate("text", x = -0.25, y = 11000, label = "NG") +
# annotate("text", x = -0.25, y = 10000, label = "DB") +
# annotate("text", x = -0.25, y = 9000, label = "SM") +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) +
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) +
# theme_Publication()
# # Arrange and plot RF scatter plots
# grid.arrange(plot_rf_l_stats, plot_rf_k_stats, plot_rf_r_stats, plot_rf_auc_stats, ncol = 2, nrow = 2)
# # Create box plots for each RF statistic
# plot_rf_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) +
# geom_boxplot() +
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 130)) +
# theme_Publication()
# plot_rf_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) +
# geom_boxplot() +
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 160)) +
# theme_Publication()
# plot_rf_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) +
# geom_boxplot() +
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 1)) +
# theme_Publication()
# plot_rf_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) +
# geom_boxplot() +
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
# coord_cartesian(ylim = c(0, 12500)) +
# theme_Publication()
# # Arrange and plot RF box plots
# grid.arrange(plot_rf_l_stats_box, plot_rf_k_stats_box, plot_rf_r_stats_box, plot_rf_auc_stats_box, ncol = 2, nrow = 2)
# # Loop to arrange and print combined RF plots
# plot_indices_rf <- seq(1, (num_genes_RF - 1), by = 3)
# for (m in seq_along(plot_indices_rf)) {
# grid.arrange(
# p_rf_l[[plot_indices_rf[m]]], p_rf_k[[plot_indices_rf[m]]], p_rf_r[[plot_indices_rf[m]]], p_rf_auc[[plot_indices_rf[m]]],
# p_rf_l[[plot_indices_rf[m] + 1]], p_rf_k[[plot_indices_rf[m] + 1]],
# p_rf_r[[plot_indices_rf[m] + 1]], p_rf_auc[[plot_indices_rf[m] + 1]],
# p_rf_l[[plot_indices_rf[m] + 2]], p_rf_k[[plot_indices_rf[m] + 2]],
# p_rf_r[[plot_indices_rf[m] + 2]], p_rf_auc[[plot_indices_rf[m] + 2]],
# ncol = 4, nrow = 3
# )
# }
# # Handle leftover RF plots if num_genes_RF is not a multiple of 3
# remaining_rf_plots <- num_genes_RF - max(plot_indices_rf + 2)
# if (remaining_rf_plots > 0) {
# plot_grid_rf_list <- lapply(seq_len(remaining_rf_plots), function(i) {
# list(p_rf_l[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_k[[plot_indices_rf[length(plot_indices_rf)] + i]],
# p_rf_r[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_auc[[plot_indices_rf[length(plot_indices_rf)] + i]])
# })
# do.call(grid.arrange, c(plot_grid_rf_list, list(ncol = 4, nrow = 3)))
# }
# dev.off()
# }
# # Calculate linear models and R-squared values for all CPPs in results 1 vs results 2
# lm_list <- list(
# lm(Z_lm_K ~ Z_lm_L, data = df_na_rm),
# lm(Z_lm_r ~ Z_lm_L, data = df_na_rm),
# lm(Z_lm_AUC ~ Z_lm_L, data = df_na_rm),
# lm(Z_lm_r ~ Z_lm_K, data = df_na_rm),
# lm(Z_lm_AUC ~ Z_lm_K, data = df_na_rm),
# lm(Z_lm_AUC ~ Z_lm_r, data = df_na_rm)
# )
# lm_summaries <- lapply(lm_list, summary)
# # Create PDF for correlation plots of CPPs
# pdf(file.path(output_dir, "Correlation_CPPs.pdf"), width = 10, height = 7, onefile = TRUE)
# # Generate correlation plots for each combination
# plot_list <- list(
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) +
# geom_point(shape = 3, color = "gray70") +
# geom_smooth(method = "lm", color = "tomato3") +
# ggtitle("Interaction L vs. Interaction K") +
# xlab("z-score L") + ylab("z-score K") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) +
# geom_point(shape = 3, color = "gray70") +
# geom_smooth(method = "lm", color = "tomato3") +
# ggtitle("Interaction L vs. Interaction r") +
# xlab("z-score L") + ylab("z-score r") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) +
# geom_point(shape = 3, color = "gray70") +
# geom_smooth(method = "lm", color = "tomato3") +
# ggtitle("Interaction L vs. Interaction AUC") +
# xlab("z-score L") + ylab("z-score AUC") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) +
# geom_point(shape = 3, color = "gray70") +
# geom_smooth(method = "lm", color = "tomato3") +
# ggtitle("Interaction K vs. Interaction r") +
# xlab("z-score K") + ylab("z-score r") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) +
# geom_point(shape = 3, color = "gray70") +
# geom_smooth(method = "lm", color = "tomato3") +
# ggtitle("Interaction K vs. Interaction AUC") +
# xlab("z-score K") + ylab("z-score AUC") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
# ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) +
# geom_point(shape = 3, color = "gray70") +
# geom_smooth(method = "lm", color = "tomato3") +
# ggtitle("Interaction r vs. Interaction AUC") +
# xlab("z-score r") + ylab("z-score AUC") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18))
# )
# # Print all correlation plots to the PDF
# lapply(plot_list, print)
# # Create additional plots with InteractionScores_RF highlighted in cyan
# interaction_scores_rf_filtered <- interaction_scores_rf[!is.na(interaction_scores_rf$Z_lm_L), ]
# highlighted_plot_list <- list(
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) +
# geom_point(shape = 3, color = "gray70") +
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_K), color = "cyan") +
# ggtitle("Interaction L vs. Interaction K") +
# xlab("z-score L") + ylab("z-score K") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) +
# geom_point(shape = 3, color = "gray70") +
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_r), color = "cyan") +
# ggtitle("Interaction L vs. Interaction r") +
# xlab("z-score L") + ylab("z-score r") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) +
# geom_point(shape = 3, color = "gray70") +
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_AUC), color = "cyan") +
# ggtitle("Interaction L vs. Interaction AUC") +
# xlab("z-score L") + ylab("z-score AUC") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) +
# geom_point(shape = 3, color = "gray70") +
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_r), color = "cyan") +
# ggtitle("Interaction K vs. Interaction r") +
# xlab("z-score K") + ylab("z-score r") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) +
# geom_point(shape = 3, color = "gray70") +
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_AUC), color = "cyan") +
# ggtitle("Interaction K vs. Interaction AUC") +
# xlab("z-score K") + ylab("z-score AUC") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
# ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) +
# geom_point(shape = 3, color = "gray70") +
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_r, Z_lm_AUC), color = "cyan") +
# ggtitle("Interaction r vs. Interaction AUC") +
# xlab("z-score r") + ylab("z-score AUC") +
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) +
# theme_Publication_legend_right() +
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18))
# )
# # Print all highlighted plots to the PDF
# lapply(highlighted_plot_list, print)
# dev.off()