Auto-commit: apps/r/calculate_interaction_zscores5.R
This commit is contained in:
@@ -133,13 +133,12 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
|
|||||||
DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0),
|
DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0),
|
||||||
OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep),
|
OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep),
|
||||||
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
|
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
|
||||||
conc_num_factor = as.numeric(as.factor(conc_num)) - 1,
|
conc_num_factor = as.numeric(as.factor(conc_num)) - 1)
|
||||||
max_conc = max(conc_num_factor))
|
|
||||||
|
|
||||||
return(df)
|
return(df)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to update Gene names using the SGD gene list
|
# Update Gene names using the SGD gene list
|
||||||
update_gene_names <- function(df, sgd_gene_list) {
|
update_gene_names <- function(df, sgd_gene_list) {
|
||||||
genes <- read.delim(file = sgd_gene_list,
|
genes <- read.delim(file = sgd_gene_list,
|
||||||
quote = "", header = FALSE,
|
quote = "", header = FALSE,
|
||||||
@@ -220,7 +219,7 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
|||||||
return(summary_stats)
|
return(summary_stats)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to calculate L values within and outside 2SD of K
|
# Calculate L values within and outside 2SD of K
|
||||||
calculate_l_2sd_of_k <- function(df, df_stats_by_k) {
|
calculate_l_2sd_of_k <- function(df, df_stats_by_k) {
|
||||||
# Join the statistics to the main dataframe
|
# Join the statistics to the main dataframe
|
||||||
df_joined <- df %>%
|
df_joined <- df %>%
|
||||||
@@ -260,7 +259,7 @@ save_plots <- function(file_name, plot_list, output_dir) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to calculate background strain mean values
|
# Calculate background strain mean values
|
||||||
calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) {
|
calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) {
|
||||||
list(
|
list(
|
||||||
L = df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L),
|
L = df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L),
|
||||||
@@ -270,24 +269,31 @@ calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function to process strains (deletion and reference)
|
# Process strains (deletion and reference)
|
||||||
process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir) {
|
process_strains <- function(df, l_within_2sd_k, strain, output_dir) {
|
||||||
|
df_strains <- data.frame() # Initialize an empty dataframe to store results
|
||||||
|
|
||||||
for (concentration in unique(df$conc_num)) {
|
for (concentration in unique(df$conc_num)) {
|
||||||
df_temp <- df %>% filter(conc_num == concentration)
|
df_temp <- df %>% filter(conc_num == concentration)
|
||||||
|
|
||||||
if (concentration > 0) {
|
if (concentration > 0) {
|
||||||
max_l_theoretical <- df_stats_by_l_within_2sd_k %>% filter(conc_num_factor == concentration) %>% pull(max_L)
|
max_l_theoretical <- l_within_2sd_k %>%
|
||||||
|
filter(conc_num_factor == concentration) %>%
|
||||||
|
pull(max_L)
|
||||||
df_temp <- df_temp %>%
|
df_temp <- df_temp %>%
|
||||||
mutate(
|
mutate(
|
||||||
L = ifelse(L == 0 & !is.na(L), max_L_theoretical, L),
|
L = ifelse(L == 0 & !is.na(L), max_l_theoretical, L),
|
||||||
SM = ifelse(L >= max_l_theoretical & !is.na(L), 1, SM),
|
SM = ifelse(L >= max_l_theoretical & !is.na(L), 1, SM),
|
||||||
L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L)
|
L = ifelse(L >= max_l_theoretical & !is.na(L), max_l_theoretical, L)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
df_strains <- bind_rows(df, df_temp)
|
df_strains <- bind_rows(df_strains, df_temp) # Append the results of this concentration
|
||||||
}
|
}
|
||||||
|
|
||||||
return(df_strains)
|
return(df_strains)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
|
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
|
||||||
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
||||||
|
|
||||||
@@ -499,7 +505,6 @@ create_ranked_plots <- function(df, output_dir) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
main <- function() {
|
main <- function() {
|
||||||
# Applying to all experiments
|
# Applying to all experiments
|
||||||
lapply(names(args$experiments), function(exp_name) {
|
lapply(names(args$experiments), function(exp_name) {
|
||||||
@@ -515,6 +520,8 @@ main <- function() {
|
|||||||
df <- load_and_process_data(args$easy_results_file, exp_sd)
|
df <- load_and_process_data(args$easy_results_file, exp_sd)
|
||||||
df <- update_gene_names(df, args$sgd_gene_list)
|
df <- update_gene_names(df, args$sgd_gene_list)
|
||||||
|
|
||||||
|
max_conc <- max(df$conc_num_factor)
|
||||||
|
|
||||||
# QC
|
# QC
|
||||||
# Filter the df above sd tolerance
|
# Filter the df above sd tolerance
|
||||||
df_above_tolerance <- df %>% filter(DB == 1)
|
df_above_tolerance <- df %>% filter(DB == 1)
|
||||||
@@ -569,10 +576,10 @@ main <- function() {
|
|||||||
|
|
||||||
# Recalculate summary statistics for the background strain
|
# Recalculate summary statistics for the background strain
|
||||||
stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
|
stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
|
||||||
stats_by_l_background <- df_stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
|
stats_by_l_background <- stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||||
stats_by_k_background <- df_stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
|
stats_by_k_background <- stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||||
stats_by_r_background <- df_stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
stats_by_r_background <- stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||||
stats_by_auc_background <- df_stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
stats_by_auc_background <- stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||||
|
|
||||||
# Backup in case previous block explodes
|
# Backup in case previous block explodes
|
||||||
# Combine all summary statistics into one dataframe
|
# Combine all summary statistics into one dataframe
|
||||||
@@ -617,8 +624,8 @@ main <- function() {
|
|||||||
filter(OrfRep != strain) %>%
|
filter(OrfRep != strain) %>%
|
||||||
mutate(SM = 0)
|
mutate(SM = 0)
|
||||||
|
|
||||||
df_reference_strains <- process_strains(df_reference, stats_by_l_within_2sd_k, strain, out_dir)
|
df_reference_strains <- process_strains(df_reference, l_within_2sd_k, strain, out_dir)
|
||||||
df_deletion_strains <- process_strains(df_deletion, stats_by_l_within_2sd_k, strain, out_dir)
|
df_deletion_strains <- process_strains(df_deletion, l_within_2sd_k, strain, out_dir)
|
||||||
|
|
||||||
variables <- c("L", "K", "r", "AUC")
|
variables <- c("L", "K", "r", "AUC")
|
||||||
|
|
||||||
@@ -626,17 +633,16 @@ main <- function() {
|
|||||||
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
|
# Change OrfRep to include the reference strain, gene, and Num so each RF gets its own score
|
||||||
# df_reference_strains <- df_reference_strains %>%
|
# df_reference_strains <- df_reference_strains %>%
|
||||||
# mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
|
# mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
|
||||||
|
# We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default
|
||||||
# This is synonymous with the legacy OrfRep mutation
|
# This is synonymous with the legacy OrfRep mutation
|
||||||
|
# Use group_by in functions in lieu of mutating OrfRep
|
||||||
# default_group_vars <- c("OrfRep", "Gene", "num")
|
# default_group_vars <- c("OrfRep", "Gene", "num")
|
||||||
|
|
||||||
# Use group_by in functions in lieu of mutating OrfRep
|
reference_results <- calculate_interaction_scores(df_reference_strains, stats_by_l,
|
||||||
# We are leaving OrfRep unchanged and using group_by(OrfRep, Gene, num) by default
|
stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
|
||||||
reference_results <- calculate_interaction_scores(df_reference, stats_by_l, stats_by_k, stats_by_r, stats_by_auc,
|
|
||||||
background_means, max_conc, variables)
|
|
||||||
|
|
||||||
deletion_results <- calculate_interaction_scores(df_deletion, stats_by_l, stats_by_k, stats_by_r, stats_by_auc,
|
deletion_results <- calculate_interaction_scores(df_deletion_strains, stats_by_l,
|
||||||
background_means, max_conc, variables)
|
stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
|
||||||
|
|
||||||
zscores_calculations_reference <- reference_results$zscores_calculations
|
zscores_calculations_reference <- reference_results$zscores_calculations
|
||||||
zscores_interactions_reference <- reference_results$zscores_interactions
|
zscores_interactions_reference <- reference_results$zscores_interactions
|
||||||
@@ -726,706 +732,3 @@ main <- function() {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# NEEDS REFACTORING
|
|
||||||
# for (s in background_strains) {
|
|
||||||
|
|
||||||
# Initialize empty plots (placeholder for future plotting)
|
|
||||||
# p_l <- ggplot()
|
|
||||||
# p_k <- ggplot()
|
|
||||||
# p_r <- ggplot()
|
|
||||||
# p_auc <- ggplot()
|
|
||||||
# p_rf_l <- ggplot()
|
|
||||||
# p_rf_k <- ggplot()
|
|
||||||
# p_rf_r <- ggplot()
|
|
||||||
# p_rf_auc <- ggplot()
|
|
||||||
|
|
||||||
# # Generate ggplot objects for each RF strain
|
|
||||||
# for (i in seq_len(num_genes_reference)) {
|
|
||||||
# gene_sel <- unique(interaction_scores_reference$OrfRep)[i]
|
|
||||||
# df_z_calculations <- df_stats_interaction_all_RF %>% filter(OrfRep == gene_sel)
|
|
||||||
# df_int_scores <- interaction_scores_RF %>% filter(OrfRep == gene_sel)
|
|
||||||
|
|
||||||
# p_rf_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) +
|
|
||||||
# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
|
|
||||||
# coord_cartesian(ylim = c(-65, 65)) +
|
|
||||||
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) +
|
|
||||||
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
|
|
||||||
# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) +
|
|
||||||
# annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_L, 2))) +
|
|
||||||
# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
|
|
||||||
# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
|
|
||||||
# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
|
|
||||||
# labels = unique(as.character(df_z_calculations$conc_num))) +
|
|
||||||
# scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
|
|
||||||
# theme_publication()
|
|
||||||
|
|
||||||
# p_rf_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) +
|
|
||||||
# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
|
|
||||||
# coord_cartesian(ylim = c(-65, 65)) +
|
|
||||||
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) +
|
|
||||||
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
|
|
||||||
# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) +
|
|
||||||
# annotate("text", x = 1, y = 25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_K, 2))) +
|
|
||||||
# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
|
|
||||||
# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
|
|
||||||
# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
|
|
||||||
# labels = unique(as.character(df_z_calculations$conc_num))) +
|
|
||||||
# scale_y_continuous(breaks = c(-60, -50, -40, -30, -20, -10, 0, 10, 20, 30, 40, 50, 60)) +
|
|
||||||
# theme_publication()
|
|
||||||
|
|
||||||
# p_rf_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) +
|
|
||||||
# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
|
|
||||||
# coord_cartesian(ylim = c(-0.65, 0.65)) +
|
|
||||||
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) +
|
|
||||||
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
|
|
||||||
# annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) +
|
|
||||||
# annotate("text", x = 1, y = 0.25, label = paste("lm Zscore =", round(df_int_scores$Z_lm_r, 2))) +
|
|
||||||
# annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) +
|
|
||||||
# annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) +
|
|
||||||
# annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
|
|
||||||
# labels = unique(as.character(df_z_calculations$conc_num))) +
|
|
||||||
# scale_y_continuous(breaks = c(-0.6, -0.4, -0.2, 0, 0.2, 0.4, 0.6)) +
|
|
||||||
# theme_publication()
|
|
||||||
|
|
||||||
# p_rf_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) +
|
|
||||||
# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
|
|
||||||
# coord_cartesian(ylim = c(-6500, 6500)) +
|
|
||||||
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) +
|
|
||||||
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
|
|
||||||
# annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) +
|
|
||||||
# annotate("text", x = 1, y = 2500, label = paste("lm Zscore =", round(df_int_scores$Z_lm_AUC, 2))) +
|
|
||||||
# annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) +
|
|
||||||
# annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) +
|
|
||||||
# annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
|
|
||||||
# labels = unique(as.character(df_z_calculations$conc_num))) +
|
|
||||||
# scale_y_continuous(breaks = c(-6000, -5000, -4000, -3000, -2000, -1000, 0, 1000, 2000, 3000, 4000, 5000, 6000)) +
|
|
||||||
# theme_publication()
|
|
||||||
|
|
||||||
# # Loop through each gene to generate plots
|
|
||||||
# for (i in 1:num_genes) {
|
|
||||||
# gene_sel <- unique(interaction_scores_deletion$OrfRep)[i]
|
|
||||||
# df_z_calculations <- df_stats_interaction_all %>% filter(OrfRep == gene_sel)
|
|
||||||
# df_int_scores <- interaction_scores_deletion %>% filter(OrfRep == gene_sel)
|
|
||||||
|
|
||||||
# p_l[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_L)) +
|
|
||||||
# geom_point() +
|
|
||||||
# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
|
|
||||||
# coord_cartesian(ylim = c(-65, 65)) +
|
|
||||||
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_l), ymax = 0 + (2 * WT_sd_l)), alpha = 0.3) +
|
|
||||||
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
|
|
||||||
# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_L, 2))) +
|
|
||||||
# annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_L, 2))) +
|
|
||||||
# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
|
|
||||||
# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
|
|
||||||
# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
|
|
||||||
# labels = unique(as.character(df_z_calculations$conc_num))) +
|
|
||||||
# scale_y_continuous(breaks = seq(-60, 60, 10)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# p_k[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_K)) +
|
|
||||||
# geom_point() +
|
|
||||||
# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
|
|
||||||
# coord_cartesian(ylim = c(-65, 65)) +
|
|
||||||
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_K), ymax = 0 + (2 * WT_sd_K)), alpha = 0.3) +
|
|
||||||
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
|
|
||||||
# annotate("text", x = 1, y = 45, label = paste("ZShift =", round(df_int_scores$Z_Shift_K, 2))) +
|
|
||||||
# annotate("text", x = 1, y = 25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_K, 2))) +
|
|
||||||
# annotate("text", x = 1, y = -25, label = paste("NG =", df_int_scores$NG)) +
|
|
||||||
# annotate("text", x = 1, y = -35, label = paste("DB =", df_int_scores$DB)) +
|
|
||||||
# annotate("text", x = 1, y = -45, label = paste("SM =", df_int_scores$SM)) +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
|
|
||||||
# labels = unique(as.character(df_z_calculations$conc_num))) +
|
|
||||||
# scale_y_continuous(breaks = seq(-60, 60, 10)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# p_r[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_r)) +
|
|
||||||
# geom_point() +
|
|
||||||
# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
|
|
||||||
# coord_cartesian(ylim = c(-0.65, 0.65)) +
|
|
||||||
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_r), ymax = 0 + (2 * WT_sd_r)), alpha = 0.3) +
|
|
||||||
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
|
|
||||||
# annotate("text", x = 1, y = 0.45, label = paste("ZShift =", round(df_int_scores$Z_Shift_r, 2))) +
|
|
||||||
# annotate("text", x = 1, y = 0.25, label = paste("Z lm Score =", round(df_int_scores$Z_lm_r, 2))) +
|
|
||||||
# annotate("text", x = 1, y = -0.25, label = paste("NG =", df_int_scores$NG)) +
|
|
||||||
# annotate("text", x = 1, y = -0.35, label = paste("DB =", df_int_scores$DB)) +
|
|
||||||
# annotate("text", x = 1, y = -0.45, label = paste("SM =", df_int_scores$SM)) +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
|
|
||||||
# labels = unique(as.character(df_z_calculations$conc_num))) +
|
|
||||||
# scale_y_continuous(breaks = seq(-0.6, 0.6, 0.2)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# p_auc[[i]] <- ggplot(df_z_calculations, aes(conc_num_factor, Delta_AUC)) +
|
|
||||||
# geom_point() +
|
|
||||||
# geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
|
|
||||||
# coord_cartesian(ylim = c(-6500, 6500)) +
|
|
||||||
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd_AUC), ymax = 0 + (2 * WT_sd_AUC)), alpha = 0.3) +
|
|
||||||
# ggtitle(paste(df_z_calculations$OrfRep[1], df_z_calculations$Gene[1], sep = " ")) +
|
|
||||||
# annotate("text", x = 1, y = 4500, label = paste("ZShift =", round(df_int_scores$Z_Shift_AUC, 2))) +
|
|
||||||
# annotate("text", x = 1, y = 2500, label = paste("Z lm Score =", round(df_int_scores$Z_lm_AUC, 2))) +
|
|
||||||
# annotate("text", x = 1, y = -2500, label = paste("NG =", df_int_scores$NG)) +
|
|
||||||
# annotate("text", x = 1, y = -3500, label = paste("DB =", df_int_scores$DB)) +
|
|
||||||
# annotate("text", x = 1, y = -4500, label = paste("SM =", df_int_scores$SM)) +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]), breaks = unique(df_z_calculations$conc_num_factor),
|
|
||||||
# labels = unique(as.character(df_z_calculations$conc_num))) +
|
|
||||||
# scale_y_continuous(breaks = seq(-6000, 6000, 1000)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# if (i == 1) {
|
|
||||||
# df_stats_interaction_all_final <- df_z_calculations
|
|
||||||
# } else {
|
|
||||||
# df_stats_interaction_all_final <- bind_rows(df_stats_interaction_all_final, df_z_calculations)
|
|
||||||
# }
|
|
||||||
# }
|
|
||||||
|
|
||||||
# print("Pass Int ggplot loop")
|
|
||||||
# write.csv(df_stats_interaction_all_final, file = file.path(output_dir, "ZScore_Calculations.csv"), row.names = FALSE)
|
|
||||||
|
|
||||||
# # Generate a blank plot for alignment purposes
|
|
||||||
# blank_plot <- ggplot(df2_rf) + geom_blank()
|
|
||||||
|
|
||||||
# # Create PDF for interaction plots
|
|
||||||
# pdf(file.path(output_dir, "InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE)
|
|
||||||
|
|
||||||
# # Summarize stats for X2_RF
|
|
||||||
# df_stats_rf <- df2_rf %>%
|
|
||||||
# group_by(conc_num, conc_num_factor) %>%
|
|
||||||
# summarise(
|
|
||||||
# mean_L = mean(L, na.rm = TRUE),
|
|
||||||
# median_L = median(L, na.rm = TRUE),
|
|
||||||
# max_L = max(L, na.rm = TRUE),
|
|
||||||
# min_L = min(L, na.rm = TRUE),
|
|
||||||
# sd_L = sd(L, na.rm = TRUE),
|
|
||||||
# mean_K = mean(K, na.rm = TRUE),
|
|
||||||
# median_K = median(K, na.rm = TRUE),
|
|
||||||
# max_K = max(K, na.rm = TRUE),
|
|
||||||
# min_K = min(K, na.rm = TRUE),
|
|
||||||
# sd_K = sd(K, na.rm = TRUE),
|
|
||||||
# mean_r = mean(r, na.rm = TRUE),
|
|
||||||
# median_r = median(r, na.rm = TRUE),
|
|
||||||
# max_r = max(r, na.rm = TRUE),
|
|
||||||
# min_r = min(r, na.rm = TRUE),
|
|
||||||
# sd_r = sd(r, na.rm = TRUE),
|
|
||||||
# mean_AUC = mean(AUC, na.rm = TRUE),
|
|
||||||
# median_AUC = median(AUC, na.rm = TRUE),
|
|
||||||
# max_AUC = max(AUC, na.rm = TRUE),
|
|
||||||
# min_AUC = min(AUC, na.rm = TRUE),
|
|
||||||
# sd_AUC = sd(AUC, na.rm = TRUE),
|
|
||||||
# NG = sum(NG, na.rm = TRUE),
|
|
||||||
# DB = sum(DB, na.rm = TRUE),
|
|
||||||
# SM = sum(SM, na.rm = TRUE)
|
|
||||||
# )
|
|
||||||
|
|
||||||
# # Create L statistics scatter plot
|
|
||||||
# plot_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) +
|
|
||||||
# geom_point(position = "jitter", size = 1) +
|
|
||||||
# stat_summary(
|
|
||||||
# fun = mean,
|
|
||||||
# fun.min = ~ mean(.) - sd(.),
|
|
||||||
# fun.max = ~ mean(.) + sd(.),
|
|
||||||
# geom = "errorbar", color = "red"
|
|
||||||
# ) +
|
|
||||||
# stat_summary(fun = mean, geom = "point", color = "red") +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]),
|
|
||||||
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 160)) +
|
|
||||||
# annotate("text", x = -0.25, y = 10, label = "NG") +
|
|
||||||
# annotate("text", x = -0.25, y = 5, label = "DB") +
|
|
||||||
# annotate("text", x = -0.25, y = 0, label = "SM") +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# # Create K statistics scatter plot
|
|
||||||
# plot_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) +
|
|
||||||
# geom_point(position = "jitter", size = 1) +
|
|
||||||
# stat_summary(
|
|
||||||
# fun = mean,
|
|
||||||
# fun.min = ~ mean(.) - sd(.),
|
|
||||||
# fun.max = ~ mean(.) + sd(.),
|
|
||||||
# geom = "errorbar", color = "red"
|
|
||||||
# ) +
|
|
||||||
# stat_summary(fun = mean, geom = "point", color = "red") +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]),
|
|
||||||
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(-20, 160)) +
|
|
||||||
# annotate("text", x = -0.25, y = -5, label = "NG") +
|
|
||||||
# annotate("text", x = -0.25, y = -12.5, label = "DB") +
|
|
||||||
# annotate("text", x = -0.25, y = -20, label = "SM") +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# # Create r statistics scatter plot
|
|
||||||
# plot_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) +
|
|
||||||
# geom_point(position = "jitter", size = 1) +
|
|
||||||
# stat_summary(
|
|
||||||
# fun = mean,
|
|
||||||
# fun.min = ~ mean(.) - sd(.),
|
|
||||||
# fun.max = ~ mean(.) + sd(.),
|
|
||||||
# geom = "errorbar", color = "red"
|
|
||||||
# ) +
|
|
||||||
# stat_summary(fun = mean, geom = "point", color = "red") +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]),
|
|
||||||
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 1)) +
|
|
||||||
# annotate("text", x = -0.25, y = .9, label = "NG") +
|
|
||||||
# annotate("text", x = -0.25, y = .8, label = "DB") +
|
|
||||||
# annotate("text", x = -0.25, y = .7, label = "SM") +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# # Create AUC statistics scatter plot
|
|
||||||
# plot_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) +
|
|
||||||
# geom_point(position = "jitter", size = 1) +
|
|
||||||
# stat_summary(
|
|
||||||
# fun = mean,
|
|
||||||
# fun.min = ~ mean(.) - sd(.),
|
|
||||||
# fun.max = ~ mean(.) + sd(.),
|
|
||||||
# geom = "errorbar", color = "red"
|
|
||||||
# ) +
|
|
||||||
# stat_summary(fun = mean, geom = "point", color = "red") +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]),
|
|
||||||
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 12500)) +
|
|
||||||
# annotate("text", x = -0.25, y = 11000, label = "NG") +
|
|
||||||
# annotate("text", x = -0.25, y = 10000, label = "DB") +
|
|
||||||
# annotate("text", x = -0.25, y = 9000, label = "SM") +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# # Arrange and plot scatter plots
|
|
||||||
# grid.arrange(plot_l_stats, plot_k_stats, plot_r_stats, plot_auc_stats, ncol = 2, nrow = 2)
|
|
||||||
|
|
||||||
# # Create box plots for each statistic
|
|
||||||
# plot_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) +
|
|
||||||
# geom_boxplot() +
|
|
||||||
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 160)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# plot_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) +
|
|
||||||
# geom_boxplot() +
|
|
||||||
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 130)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# plot_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) +
|
|
||||||
# geom_boxplot() +
|
|
||||||
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 1)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# plot_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) +
|
|
||||||
# geom_boxplot() +
|
|
||||||
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 12500)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# # Arrange and plot box plots
|
|
||||||
# grid.arrange(plot_l_stats_box, plot_k_stats_box, plot_r_stats_box, plot_auc_stats_box, ncol = 2, nrow = 2)
|
|
||||||
|
|
||||||
# # Loop to arrange and print combined plots
|
|
||||||
# plot_indices <- seq(1, (num_genes - 1), by = 3)
|
|
||||||
# for (m in seq_along(plot_indices)) {
|
|
||||||
# grid.arrange(
|
|
||||||
# p_l[[plot_indices[m]]], p_k[[plot_indices[m]]], p_r[[plot_indices[m]]], p_auc[[plot_indices[m]]],
|
|
||||||
# p_l[[plot_indices[m] + 1]], p_k[[plot_indices[m] + 1]], p_r[[plot_indices[m] + 1]], p_auc[[plot_indices[m] + 1]],
|
|
||||||
# p_l[[plot_indices[m] + 2]], p_k[[plot_indices[m] + 2]], p_r[[plot_indices[m] + 2]], p_auc[[plot_indices[m] + 2]],
|
|
||||||
# ncol = 4, nrow = 3
|
|
||||||
# )
|
|
||||||
# }
|
|
||||||
|
|
||||||
# # Handle leftover plots if num_genes is not a multiple of 3
|
|
||||||
# remaining_plots <- num_genes - max(plot_indices + 2)
|
|
||||||
# if (remaining_plots > 0) {
|
|
||||||
# plot_grid_list <- lapply(seq_len(remaining_plots), function(i) {
|
|
||||||
# list(p_l[[plot_indices[length(plot_indices)] + i]],
|
|
||||||
# p_k[[plot_indices[length(plot_indices)] + i]],
|
|
||||||
# p_r[[plot_indices[length(plot_indices)] + i]],
|
|
||||||
# p_auc[[plot_indices[length(plot_indices)] + i]])
|
|
||||||
# })
|
|
||||||
# do.call(grid.arrange, c(plot_grid_list, list(ncol = 4, nrow = 3)))
|
|
||||||
# }
|
|
||||||
|
|
||||||
# dev.off()
|
|
||||||
|
|
||||||
# # Additional PDF output for RF interaction plots
|
|
||||||
# # Generate PDF for RF interaction plots
|
|
||||||
# pdf(file.path(output_dir, "RF_InteractionPlots.pdf"), width = 16, height = 16, onefile = TRUE)
|
|
||||||
|
|
||||||
# # Summarize stats for RF data
|
|
||||||
# df_stats_rf <- df2_rf %>%
|
|
||||||
# group_by(conc_num, conc_num_factor) %>%
|
|
||||||
# summarise(
|
|
||||||
# mean_L = mean(L, na.rm = TRUE),
|
|
||||||
# median_L = median(L, na.rm = TRUE),
|
|
||||||
# max_L = max(L, na.rm = TRUE),
|
|
||||||
# min_L = min(L, na.rm = TRUE),
|
|
||||||
# sd_L = sd(L, na.rm = TRUE),
|
|
||||||
# mean_K = mean(K, na.rm = TRUE),
|
|
||||||
# median_K = median(K, na.rm = TRUE),
|
|
||||||
# max_K = max(K, na.rm = TRUE),
|
|
||||||
# min_K = min(K, na.rm = TRUE),
|
|
||||||
# sd_K = sd(K, na.rm = TRUE),
|
|
||||||
# mean_r = mean(r, na.rm = TRUE),
|
|
||||||
# median_r = median(r, na.rm = TRUE),
|
|
||||||
# max_r = max(r, na.rm = TRUE),
|
|
||||||
# min_r = min(r, na.rm = TRUE),
|
|
||||||
# sd_r = sd(r, na.rm = TRUE),
|
|
||||||
# mean_AUC = mean(AUC, na.rm = TRUE),
|
|
||||||
# median_AUC = median(AUC, na.rm = TRUE),
|
|
||||||
# max_AUC = max(AUC, na.rm = TRUE),
|
|
||||||
# min_AUC = min(AUC, na.rm = TRUE),
|
|
||||||
# sd_AUC = sd(AUC, na.rm = TRUE),
|
|
||||||
# NG = sum(NG, na.rm = TRUE),
|
|
||||||
# DB = sum(DB, na.rm = TRUE),
|
|
||||||
# SM = sum(SM, na.rm = TRUE)
|
|
||||||
# )
|
|
||||||
|
|
||||||
# # Create L statistics scatter plot for RF data
|
|
||||||
# plot_rf_l_stats <- ggplot(df2_rf, aes(conc_num_factor, L)) +
|
|
||||||
# geom_point(position = "jitter", size = 1) +
|
|
||||||
# stat_summary(
|
|
||||||
# fun = mean,
|
|
||||||
# fun.min = ~ mean(.) - sd(.),
|
|
||||||
# fun.max = ~ mean(.) + sd(.),
|
|
||||||
# geom = "errorbar", color = "red"
|
|
||||||
# ) +
|
|
||||||
# stat_summary(fun = mean, geom = "point", color = "red") +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]),
|
|
||||||
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 130)) +
|
|
||||||
# annotate("text", x = -0.25, y = 10, label = "NG") +
|
|
||||||
# annotate("text", x = -0.25, y = 5, label = "DB") +
|
|
||||||
# annotate("text", x = -0.25, y = 0, label = "SM") +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10, label = df_stats_rf$NG) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 5, label = df_stats_rf$DB) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 0, label = df_stats_rf$SM) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# # Create K statistics scatter plot for RF data
|
|
||||||
# plot_rf_k_stats <- ggplot(df2_rf, aes(conc_num_factor, K)) +
|
|
||||||
# geom_point(position = "jitter", size = 1) +
|
|
||||||
# stat_summary(
|
|
||||||
# fun = mean,
|
|
||||||
# fun.min = ~ mean(.) - sd(.),
|
|
||||||
# fun.max = ~ mean(.) + sd(.),
|
|
||||||
# geom = "errorbar", color = "red"
|
|
||||||
# ) +
|
|
||||||
# stat_summary(fun = mean, geom = "point", color = "red") +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]),
|
|
||||||
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(-20, 160)) +
|
|
||||||
# annotate("text", x = -0.25, y = -5, label = "NG") +
|
|
||||||
# annotate("text", x = -0.25, y = -12.5, label = "DB") +
|
|
||||||
# annotate("text", x = -0.25, y = -20, label = "SM") +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -5, label = df_stats_rf$NG) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -12.5, label = df_stats_rf$DB) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = -20, label = df_stats_rf$SM) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# # Create r statistics scatter plot for RF data
|
|
||||||
# plot_rf_r_stats <- ggplot(df2_rf, aes(conc_num_factor, r)) +
|
|
||||||
# geom_point(position = "jitter", size = 1) +
|
|
||||||
# stat_summary(
|
|
||||||
# fun = mean,
|
|
||||||
# fun.min = ~ mean(.) - sd(.),
|
|
||||||
# fun.max = ~ mean(.) + sd(.),
|
|
||||||
# geom = "errorbar", color = "red"
|
|
||||||
# ) +
|
|
||||||
# stat_summary(fun = mean, geom = "point", color = "red") +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]),
|
|
||||||
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 1)) +
|
|
||||||
# annotate("text", x = -0.25, y = .9, label = "NG") +
|
|
||||||
# annotate("text", x = -0.25, y = .8, label = "DB") +
|
|
||||||
# annotate("text", x = -0.25, y = .7, label = "SM") +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .9, label = df_stats_rf$NG) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .8, label = df_stats_rf$DB) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = .7, label = df_stats_rf$SM) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# # Create AUC statistics scatter plot for RF data
|
|
||||||
# plot_rf_auc_stats <- ggplot(df2_rf, aes(conc_num_factor, AUC)) +
|
|
||||||
# geom_point(position = "jitter", size = 1) +
|
|
||||||
# stat_summary(
|
|
||||||
# fun = mean,
|
|
||||||
# fun.min = ~ mean(.) - sd(.),
|
|
||||||
# fun.max = ~ mean(.) + sd(.),
|
|
||||||
# geom = "errorbar", color = "red"
|
|
||||||
# ) +
|
|
||||||
# stat_summary(fun = mean, geom = "point", color = "red") +
|
|
||||||
# scale_x_continuous(name = unique(df$Drug[1]),
|
|
||||||
# breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 12500)) +
|
|
||||||
# annotate("text", x = -0.25, y = 11000, label = "NG") +
|
|
||||||
# annotate("text", x = -0.25, y = 10000, label = "DB") +
|
|
||||||
# annotate("text", x = -0.25, y = 9000, label = "SM") +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 11000, label = df_stats_rf$NG) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 10000, label = df_stats_rf$DB) +
|
|
||||||
# annotate("text", x = unique(df2_rf$conc_num_factor), y = 9000, label = df_stats_rf$SM) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# # Arrange and plot RF scatter plots
|
|
||||||
# grid.arrange(plot_rf_l_stats, plot_rf_k_stats, plot_rf_r_stats, plot_rf_auc_stats, ncol = 2, nrow = 2)
|
|
||||||
|
|
||||||
# # Create box plots for each RF statistic
|
|
||||||
# plot_rf_l_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), L)) +
|
|
||||||
# geom_boxplot() +
|
|
||||||
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for L with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 130)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# plot_rf_k_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), K)) +
|
|
||||||
# geom_boxplot() +
|
|
||||||
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for K with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 160)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# plot_rf_r_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), r)) +
|
|
||||||
# geom_boxplot() +
|
|
||||||
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for r with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 1)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# plot_rf_auc_stats_box <- ggplot(df2_rf, aes(as.factor(conc_num_factor), AUC)) +
|
|
||||||
# geom_boxplot() +
|
|
||||||
# scale_x_discrete(name = unique(df$Drug[1]), breaks = unique(df2_rf$conc_num_factor), labels = as.character(unique(df2_rf$conc_num))) +
|
|
||||||
# ggtitle(paste(s, "Scatter RF for AUC with SD", sep = " ")) +
|
|
||||||
# coord_cartesian(ylim = c(0, 12500)) +
|
|
||||||
# theme_Publication()
|
|
||||||
|
|
||||||
# # Arrange and plot RF box plots
|
|
||||||
# grid.arrange(plot_rf_l_stats_box, plot_rf_k_stats_box, plot_rf_r_stats_box, plot_rf_auc_stats_box, ncol = 2, nrow = 2)
|
|
||||||
|
|
||||||
# # Loop to arrange and print combined RF plots
|
|
||||||
# plot_indices_rf <- seq(1, (num_genes_RF - 1), by = 3)
|
|
||||||
# for (m in seq_along(plot_indices_rf)) {
|
|
||||||
# grid.arrange(
|
|
||||||
# p_rf_l[[plot_indices_rf[m]]], p_rf_k[[plot_indices_rf[m]]], p_rf_r[[plot_indices_rf[m]]], p_rf_auc[[plot_indices_rf[m]]],
|
|
||||||
# p_rf_l[[plot_indices_rf[m] + 1]], p_rf_k[[plot_indices_rf[m] + 1]],
|
|
||||||
# p_rf_r[[plot_indices_rf[m] + 1]], p_rf_auc[[plot_indices_rf[m] + 1]],
|
|
||||||
# p_rf_l[[plot_indices_rf[m] + 2]], p_rf_k[[plot_indices_rf[m] + 2]],
|
|
||||||
# p_rf_r[[plot_indices_rf[m] + 2]], p_rf_auc[[plot_indices_rf[m] + 2]],
|
|
||||||
# ncol = 4, nrow = 3
|
|
||||||
# )
|
|
||||||
# }
|
|
||||||
|
|
||||||
# # Handle leftover RF plots if num_genes_RF is not a multiple of 3
|
|
||||||
# remaining_rf_plots <- num_genes_RF - max(plot_indices_rf + 2)
|
|
||||||
# if (remaining_rf_plots > 0) {
|
|
||||||
# plot_grid_rf_list <- lapply(seq_len(remaining_rf_plots), function(i) {
|
|
||||||
# list(p_rf_l[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_k[[plot_indices_rf[length(plot_indices_rf)] + i]],
|
|
||||||
# p_rf_r[[plot_indices_rf[length(plot_indices_rf)] + i]], p_rf_auc[[plot_indices_rf[length(plot_indices_rf)] + i]])
|
|
||||||
# })
|
|
||||||
# do.call(grid.arrange, c(plot_grid_rf_list, list(ncol = 4, nrow = 3)))
|
|
||||||
# }
|
|
||||||
# dev.off()
|
|
||||||
# }
|
|
||||||
|
|
||||||
# # Calculate linear models and R-squared values for all CPPs in results 1 vs results 2
|
|
||||||
# lm_list <- list(
|
|
||||||
# lm(Z_lm_K ~ Z_lm_L, data = df_na_rm),
|
|
||||||
# lm(Z_lm_r ~ Z_lm_L, data = df_na_rm),
|
|
||||||
# lm(Z_lm_AUC ~ Z_lm_L, data = df_na_rm),
|
|
||||||
# lm(Z_lm_r ~ Z_lm_K, data = df_na_rm),
|
|
||||||
# lm(Z_lm_AUC ~ Z_lm_K, data = df_na_rm),
|
|
||||||
# lm(Z_lm_AUC ~ Z_lm_r, data = df_na_rm)
|
|
||||||
# )
|
|
||||||
|
|
||||||
# lm_summaries <- lapply(lm_list, summary)
|
|
||||||
|
|
||||||
# # Create PDF for correlation plots of CPPs
|
|
||||||
# pdf(file.path(output_dir, "Correlation_CPPs.pdf"), width = 10, height = 7, onefile = TRUE)
|
|
||||||
|
|
||||||
# # Generate correlation plots for each combination
|
|
||||||
# plot_list <- list(
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_smooth(method = "lm", color = "tomato3") +
|
|
||||||
# ggtitle("Interaction L vs. Interaction K") +
|
|
||||||
# xlab("z-score L") + ylab("z-score K") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
|
|
||||||
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_smooth(method = "lm", color = "tomato3") +
|
|
||||||
# ggtitle("Interaction L vs. Interaction r") +
|
|
||||||
# xlab("z-score L") + ylab("z-score r") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
|
|
||||||
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_smooth(method = "lm", color = "tomato3") +
|
|
||||||
# ggtitle("Interaction L vs. Interaction AUC") +
|
|
||||||
# xlab("z-score L") + ylab("z-score AUC") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
|
|
||||||
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_smooth(method = "lm", color = "tomato3") +
|
|
||||||
# ggtitle("Interaction K vs. Interaction r") +
|
|
||||||
# xlab("z-score K") + ylab("z-score r") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
|
|
||||||
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_smooth(method = "lm", color = "tomato3") +
|
|
||||||
# ggtitle("Interaction K vs. Interaction AUC") +
|
|
||||||
# xlab("z-score K") + ylab("z-score AUC") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
|
|
||||||
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_smooth(method = "lm", color = "tomato3") +
|
|
||||||
# ggtitle("Interaction r vs. Interaction AUC") +
|
|
||||||
# xlab("z-score r") + ylab("z-score AUC") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18))
|
|
||||||
# )
|
|
||||||
|
|
||||||
# # Print all correlation plots to the PDF
|
|
||||||
# lapply(plot_list, print)
|
|
||||||
|
|
||||||
# # Create additional plots with InteractionScores_RF highlighted in cyan
|
|
||||||
# interaction_scores_rf_filtered <- interaction_scores_rf[!is.na(interaction_scores_rf$Z_lm_L), ]
|
|
||||||
|
|
||||||
# highlighted_plot_list <- list(
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_K)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_K), color = "cyan") +
|
|
||||||
# ggtitle("Interaction L vs. Interaction K") +
|
|
||||||
# xlab("z-score L") + ylab("z-score K") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[1]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
|
|
||||||
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_r)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_r), color = "cyan") +
|
|
||||||
# ggtitle("Interaction L vs. Interaction r") +
|
|
||||||
# xlab("z-score L") + ylab("z-score r") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[2]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
|
|
||||||
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_L, Z_lm_AUC)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_L, Z_lm_AUC), color = "cyan") +
|
|
||||||
# ggtitle("Interaction L vs. Interaction AUC") +
|
|
||||||
# xlab("z-score L") + ylab("z-score AUC") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[3]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
|
|
||||||
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_r)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_r), color = "cyan") +
|
|
||||||
# ggtitle("Interaction K vs. Interaction r") +
|
|
||||||
# xlab("z-score K") + ylab("z-score r") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[4]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
|
|
||||||
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_K, Z_lm_AUC)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_K, Z_lm_AUC), color = "cyan") +
|
|
||||||
# ggtitle("Interaction K vs. Interaction AUC") +
|
|
||||||
# xlab("z-score K") + ylab("z-score AUC") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[5]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18)),
|
|
||||||
|
|
||||||
# ggplot(df_na_rm, aes(Z_lm_r, Z_lm_AUC)) +
|
|
||||||
# geom_point(shape = 3, color = "gray70") +
|
|
||||||
# geom_point(data = interaction_scores_rf_filtered, aes(Z_lm_r, Z_lm_AUC), color = "cyan") +
|
|
||||||
# ggtitle("Interaction r vs. Interaction AUC") +
|
|
||||||
# xlab("z-score r") + ylab("z-score AUC") +
|
|
||||||
# annotate("text", x = 0, y = 0, label = paste("R-squared = ", round(lm_summaries[[6]]$r.squared, 3))) +
|
|
||||||
# theme_Publication_legend_right() +
|
|
||||||
# theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
|
|
||||||
# axis.text.x = element_text(size = 16), axis.title.x = element_text(size = 18),
|
|
||||||
# axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 18))
|
|
||||||
# )
|
|
||||||
|
|
||||||
# # Print all highlighted plots to the PDF
|
|
||||||
# lapply(highlighted_plot_list, print)
|
|
||||||
|
|
||||||
# dev.off()
|
|
||||||
|
|||||||
Reference in New Issue
Block a user