Before old plotting removal
This commit is contained in:
@@ -163,102 +163,6 @@ update_gene_names <- function(df, sgd_gene_list) {
|
||||
return(df)
|
||||
}
|
||||
|
||||
generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_num", title, x_label = NULL, y_label = NULL) {
|
||||
# Start the ggplot with just the x_var and color_var
|
||||
plot <- ggplot(df, aes(x = !!sym(x_var), color = as.factor(!!sym(color_var)))) +
|
||||
ggtitle(title) +
|
||||
theme_publication()
|
||||
|
||||
# If y_var is not NULL, add it to the plot aesthetics
|
||||
if (!is.null(y_var)) {
|
||||
plot <- plot + aes(y = !!sym(y_var))
|
||||
}
|
||||
|
||||
# Handle different plot types
|
||||
plot <- switch(plot_type,
|
||||
"scatter" = plot + geom_point(shape = 3, size = 0.2, position = "jitter") +
|
||||
stat_summary(fun.data = mean_sdl, geom = "errorbar") +
|
||||
stat_summary(fun = mean, geom = "point", size = 0.6),
|
||||
"box" = plot + geom_boxplot(),
|
||||
"density" = plot + geom_density(),
|
||||
"bar" = plot + geom_bar(stat = ifelse(is.null(y_var), "count", "identity"))
|
||||
)
|
||||
|
||||
# Add optional labels if provided
|
||||
if (!is.null(x_label)) plot <- plot + xlab(x_label)
|
||||
if (!is.null(y_label)) plot <- plot + ylab(y_label)
|
||||
|
||||
return(plot)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
generate_and_save_plots <- function(df, output_dir, prefix, variables, include_qc = FALSE) {
|
||||
plots <- list()
|
||||
|
||||
if (nrow(df) == 0) {
|
||||
message("The \"", deparse(substitute(df)), "\" dataframe is empty, skipping plots")
|
||||
return()
|
||||
}
|
||||
|
||||
message("Generating plots for \"", deparse(substitute(df)), "\" dataframe")
|
||||
|
||||
for (var in variables) {
|
||||
scatter_plot <-
|
||||
generate_plot(df, x_var = "scan", y_var = var, plot_type = "scatter",
|
||||
title = paste(prefix, "Scatter Plot for", var))
|
||||
boxplot <-
|
||||
generate_plot(df, x_var = "scan", y_var = var, plot_type = "box",
|
||||
title = paste(prefix, "Box Plot for", var))
|
||||
|
||||
plots[[paste0(var, "_scatter")]] <- scatter_plot
|
||||
plots[[paste0(var, "_box")]] <- boxplot
|
||||
}
|
||||
|
||||
if (include_qc) {
|
||||
plots[["Raw_L_vs_K"]] <-
|
||||
generate_plot(df, x_var = "L", y_var = "K", plot_type = "scatter",
|
||||
title = "Raw L vs K before QC")
|
||||
plots[["Delta_bg_Density"]] <-
|
||||
generate_plot(df, x_var = "delta_bg", plot_type = "density", color_var = "conc_num",
|
||||
title = "Density plot for Delta Background by Conc All Data")
|
||||
plots[["Delta_bg_Bar"]] <-
|
||||
generate_plot(df, x_var = "delta_bg", plot_type = "bar",
|
||||
title = "Bar plot for Delta Background by Conc All Data")
|
||||
}
|
||||
|
||||
save_plots(prefix, plots, output_dir)
|
||||
}
|
||||
|
||||
# Ensure all plots are saved and printed to PDF
|
||||
save_plots <- function(file_name, plot_list, output_dir) {
|
||||
# Save to PDF
|
||||
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
|
||||
lapply(plot_list, function(plot) {
|
||||
print(plot)
|
||||
})
|
||||
dev.off()
|
||||
|
||||
# Save to HTML with horizontal legend orientation
|
||||
lapply(names(plot_list), function(plot_name) {
|
||||
message("Generating plot: ", plot_name)
|
||||
pgg <- tryCatch({
|
||||
suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
|
||||
layout(legend = list(orientation = "h")))
|
||||
}, error = function(e) {
|
||||
message("Error in plot: ", plot_name, "\n", e)
|
||||
return(NULL)
|
||||
})
|
||||
if (!is.null(pgg)) {
|
||||
saveWidget(pgg,
|
||||
file = file.path(output_dir,
|
||||
paste0(file_name, "_", plot_name, ".html")),
|
||||
selfcontained = TRUE)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
# Process strains (deletion and reference)
|
||||
process_strains <- function(df) {
|
||||
df_strains <- data.frame() # Initialize an empty dataframe to store results
|
||||
@@ -295,7 +199,7 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
||||
max = ~max(.x, na.rm = TRUE),
|
||||
min = ~min(.x, na.rm = TRUE),
|
||||
sd = ~sd(.x, na.rm = TRUE),
|
||||
se = ~sd(.x, na.rm = TRUE) / sqrt(n() - 1)
|
||||
se = ~sd(.x, na.rm = TRUE) / sqrt(n() - 1) # TODO why - 1?
|
||||
), .names = "{.col}_{.fn}"))
|
||||
|
||||
return(summary_stats)
|
||||
@@ -305,48 +209,70 @@ calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_
|
||||
|
||||
# Pull the background means
|
||||
print("Calculating background means")
|
||||
l_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(L_mean)
|
||||
k_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(K_mean)
|
||||
L_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(L_mean)
|
||||
K_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(K_mean)
|
||||
r_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(r_mean)
|
||||
auc_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(AUC_mean)
|
||||
AUC_mean_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(AUC_mean)
|
||||
|
||||
L_sd_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(L_sd)
|
||||
K_sd_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(K_sd)
|
||||
r_sd_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(r_sd)
|
||||
AUC_sd_bg <- df_ref %>% filter(conc_num_factor == 0) %>% pull(AUC_sd)
|
||||
|
||||
# Calculate all necessary statistics and shifts in one step
|
||||
# Calculate summary statistics and shifts
|
||||
print("Calculating interaction scores part 1")
|
||||
interaction_scores_all <- df %>%
|
||||
group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>%
|
||||
summarise(
|
||||
N = n(),
|
||||
across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE),
|
||||
NG = sum(NG, na.rm = TRUE),
|
||||
SM = sum(SM, na.rm = TRUE),
|
||||
raw_shift_l = mean(L, na.rm = TRUE) - l_mean_bg,
|
||||
raw_shift_k = mean(K, na.rm = TRUE) - k_mean_bg,
|
||||
raw_shift_r = mean(r, na.rm = TRUE) - r_mean_bg,
|
||||
raw_shift_auc = mean(AUC, na.rm = TRUE) - auc_mean_bg,
|
||||
z_shift_l = raw_shift_l / L_sd[1],
|
||||
z_shift_k = raw_shift_k / K_sd[1],
|
||||
z_shift_r = raw_shift_r / r_sd[1],
|
||||
z_shift_auc = raw_shift_auc / AUC_sd[1],
|
||||
exp_l = l_mean_bg + raw_shift_l,
|
||||
exp_k = k_mean_bg + raw_shift_k,
|
||||
exp_r = r_mean_bg + raw_shift_r,
|
||||
exp_auc = auc_mean_bg + raw_shift_auc,
|
||||
delta_l = mean(L, na.rm = TRUE) - exp_l,
|
||||
delta_k = mean(K, na.rm = TRUE) - exp_k,
|
||||
delta_r = mean(r, na.rm = TRUE) - exp_r,
|
||||
delta_auc = mean(AUC, na.rm = TRUE) - exp_auc,
|
||||
zscore_l = delta_l / L_sd,
|
||||
zscore_k = delta_k / K_sd,
|
||||
zscore_r = delta_r / r_sd,
|
||||
zscore_auc = delta_auc / AUC_sd,
|
||||
sum_z_score_l = sum(zscore_l, na.rm = TRUE),
|
||||
avg_zscore_l = sum_z_score_l / (length(variables) - sum(NG, na.rm = TRUE)),
|
||||
sum_z_score_k = sum(zscore_k, na.rm = TRUE),
|
||||
avg_zscore_k = sum_z_score_k / (length(variables) - sum(NG, na.rm = TRUE)),
|
||||
sum_z_score_r = sum(zscore_r, na.rm = TRUE),
|
||||
avg_zscore_r = sum_z_score_r / (length(variables) - sum(NG, na.rm = TRUE)),
|
||||
sum_z_score_auc = sum(zscore_auc, na.rm = TRUE),
|
||||
avg_zscore_auc = sum_z_score_auc / (length(variables) - sum(NG, na.rm = TRUE))
|
||||
L_mean = mean(L),
|
||||
L_median = median(L),
|
||||
L_sd = sd(L),
|
||||
L_se = L_sd / sqrt(N),
|
||||
K_mean = mean(K),
|
||||
K_median = median(K),
|
||||
K_sd = sd(K),
|
||||
K_se = K_sd / sqrt(N),
|
||||
r_mean = mean(r),
|
||||
r_median = median(r),
|
||||
r_sd = sd(r),
|
||||
r_se = r_sd / sqrt(N),
|
||||
AUC_mean = mean(AUC),
|
||||
AUC_median = median(AUC),
|
||||
AUC_sd = sd(AUC),
|
||||
AUC_se = AUC_sd / sqrt(N),
|
||||
NG = sum(NG),
|
||||
DB = sum(DB),
|
||||
SM = sum(SM),
|
||||
Raw_Shift_L = L_mean - L_mean_bg[[1]],
|
||||
Raw_Shift_K = K_mean - K_mean_bg[[1]],
|
||||
Raw_Shift_r = r_mean - r_mean_bg[[1]],
|
||||
Raw_Shift_AUC = AUC_mean - AUC_mean_bg[[1]],
|
||||
Z_Shift_L = Raw_Shift_L / L_sd[[0]],
|
||||
Z_Shift_K = Raw_Shift_K / K_sd[[0]],
|
||||
Z_Shift_r = Raw_Shift_r / r_sd[[0]],
|
||||
Z_Shift_AUC = Raw_Shift_AUC / AUC_sd[[0]],
|
||||
WT_l = df$L_mean,
|
||||
WT_K = df$K_mean,
|
||||
WT_r = df$r_mean,
|
||||
WT_AUC = df$AUC_mean,
|
||||
WT_sd_l = L_sd_bg[[1]],
|
||||
WT_sd_K = K_sd_bg[[1]],
|
||||
WT_sd_r = r_sd_bg[[1]],
|
||||
WT_sd_AUC = AUC_sd_bg[[1]],
|
||||
Exp_L = L_mean_bg[[1]] + Raw_Shift_L,
|
||||
Exp_K = K_mean_bg[[1]] + Raw_Shift_K,
|
||||
Exp_r = r_mean_bg[[1]] + Raw_Shift_r,
|
||||
Exp_AUC = AUC_mean_bg[[1]] + Raw_Shift_AUC,
|
||||
Delta_L = ifelse(NG == 1, mean(L) - WT_l, Delta_L),
|
||||
Delta_L = ifelse(SM == 1, mean(L) - WT_l, Delta_L),
|
||||
Delta_K = ifelse(NG == 1, mean(K) - WT_K, Delta_K),
|
||||
Delta_r = ifelse(NG == 1, mean(r) - WT_r, Delta_r),
|
||||
Delta_AUC = ifelse(NG == 1, mean(AUC) - WT_AUC, Delta_AUC),
|
||||
Zscore_L = Delta_L / WT_sd_l,
|
||||
Zscore_K = Delta_K / WT_sd_K,
|
||||
Zscore_r = Delta_r / WT_sd_r,
|
||||
Zscore_AUC = Delta_AUC / WT_sd_AUC,
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
@@ -355,10 +281,10 @@ calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_
|
||||
interaction_scores <- interaction_scores_all %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
summarise(
|
||||
lm_l = list(lm(delta_l ~ conc_num_factor)),
|
||||
lm_k = list(lm(delta_k ~ conc_num_factor)),
|
||||
lm_r = list(lm(delta_r ~ conc_num_factor)),
|
||||
lm_auc = list(lm(delta_auc ~ conc_num_factor)),
|
||||
lm_l = list(lm(Delta_L ~ conc_num_factor)),
|
||||
lm_k = list(lm(Delta_K ~ conc_num_factor)),
|
||||
lm_r = list(lm(Delta_r ~ conc_num_factor)),
|
||||
lm_auc = list(lm(Delta_AUC ~ conc_num_factor)),
|
||||
lm_Score_L = max_conc * coef(lm_l[[1]])[2] + coef(lm_l[[1]])[1],
|
||||
lm_Score_K = max_conc * coef(lm_k[[1]])[2] + coef(lm_k[[1]])[1],
|
||||
lm_Score_r = max_conc * coef(lm_r[[1]])[2] + coef(lm_r[[1]])[1],
|
||||
@@ -367,66 +293,192 @@ calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_
|
||||
R_Squared_K = summary(lm_k[[1]])$r.squared,
|
||||
R_Squared_r = summary(lm_r[[1]])$r.squared,
|
||||
R_Squared_AUC = summary(lm_auc[[1]])$r.squared,
|
||||
Sum_Z_Score_L = sum(zscore_l, na.rm = TRUE),
|
||||
Avg_Zscore_L = avg_zscore_l[1],
|
||||
Sum_Z_Score_K = sum(zscore_k, na.rm = TRUE),
|
||||
Avg_Zscore_K = avg_zscore_k[1],
|
||||
Sum_Z_Score_r = sum(zscore_r, na.rm = TRUE),
|
||||
Avg_Zscore_r = avg_zscore_r[1],
|
||||
Sum_Z_Score_AUC = sum(zscore_auc, na.rm = TRUE),
|
||||
Avg_Zscore_AUC = avg_zscore_auc[1],
|
||||
NG = sum(NG, na.rm = TRUE),
|
||||
DB = sum(DB, na.rm = TRUE),
|
||||
SM = sum(SM, na.rm = TRUE)
|
||||
Sum_Z_Score_L = sum(Zscore_L),
|
||||
Avg_Zscore_L = Sum_Z_Score_L / (length(variables) - sum(NG)),
|
||||
Sum_Z_Score_K = sum(Zscore_K),
|
||||
Avg_Zscore_K = Sum_Z_Score_K / (length(variables) - sum(NG)),
|
||||
Sum_Z_Score_r = sum(Zscore_r),
|
||||
Avg_Zscore_r = Sum_Z_Score_r / (length(variables) - sum(NG)),
|
||||
Sum_Z_Score_AUC = sum(Zscore_AUC),
|
||||
Avg_Zscore_AUC = Sum_Z_Score_AUC / (length(variables) - sum(NG)),
|
||||
NG = sum(NG),
|
||||
DB = sum(DB),
|
||||
SM = sum(SM)
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
# Ensure interaction_scores has only the specified columns
|
||||
interaction_scores <- interaction_scores %>%
|
||||
select(Gene, raw_shift_l, z_shift_l, lm_Score_L, Z_lm_L = lm_Score_L, R_Squared_L, sum_z_score_l, avg_zscore_l,
|
||||
raw_shift_k, z_shift_k, lm_Score_K, Z_lm_K = lm_Score_K, R_Squared_K, sum_z_score_k, avg_zscore_k,
|
||||
raw_shift_r, z_shift_r, lm_Score_r, Z_lm_r = lm_Score_r, R_Squared_r, sum_z_score_r, avg_zscore_r,
|
||||
raw_shift_auc, z_shift_auc, lm_Score_AUC, Z_lm_AUC = lm_Score_AUC, R_Squared_AUC, sum_z_score_auc, avg_zscore_auc,
|
||||
NG, DB, SM)
|
||||
|
||||
return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
|
||||
}
|
||||
|
||||
generate_rf_plots <- function(df_calculations, df_interactions, output_dir, file_prefix = "RF") {
|
||||
variables <- c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC")
|
||||
WT_sds <- list(WT_sd_l = 2, WT_sd_K = 2, WT_sd_r = 0.65, WT_sd_AUC = 6500)
|
||||
|
||||
# generate_rf_plots <- function(df_calculations, df_interactions, output_dir, file_prefix = "RF") {
|
||||
# variables <- c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC")
|
||||
# WT_sds <- list(WT_sd_l = 2, WT_sd_K = 2, WT_sd_r = 0.65, WT_sd_AUC = 6500)
|
||||
|
||||
plot_list <- lapply(seq_along(variables), function(i) {
|
||||
var <- variables[i]
|
||||
WT_sd <- WT_sds[[i]]
|
||||
# plot_list <- lapply(seq_along(variables), function(i) {
|
||||
# var <- variables[i]
|
||||
# WT_sd <- WT_sds[[i]]
|
||||
|
||||
ggplot(df_calculations, aes(conc_num_factor, !!sym(var))) +
|
||||
geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
|
||||
coord_cartesian(ylim = c(-WT_sd, WT_sd)) +
|
||||
geom_errorbar(aes(ymin = 0 - (2 * WT_sd), ymax = 0 + (2 * WT_sd)), alpha = 0.3) +
|
||||
ggtitle(paste(df_calculations$OrfRep[1], df_calculations$Gene[1], sep = " ")) +
|
||||
annotate("text", x = 1, y = 0.9 * WT_sd, label = paste("ZShift =", round(df_interactions[[paste0("Z_Shift_", var)]], 2))) +
|
||||
annotate("text", x = 1, y = 0.7 * WT_sd, label = paste("lm Zscore =", round(df_interactions[[paste0("Z_lm_", var)]], 2))) +
|
||||
annotate("text", x = 1, y = -0.7 * WT_sd, label = paste("NG =", df_interactions$NG)) +
|
||||
annotate("text", x = 1, y = -0.9 * WT_sd, label = paste("DB =", df_interactions$DB)) +
|
||||
annotate("text", x = 1, y = -1.1 * WT_sd, label = paste("SM =", df_interactions$SM)) +
|
||||
scale_x_continuous(
|
||||
name = unique(df_calculations$Drug[1]),
|
||||
breaks = unique(df_calculations$conc_num_factor),
|
||||
labels = unique(as.character(df_calculations$conc_num))) +
|
||||
theme_publication()
|
||||
})
|
||||
# ggplot(df_calculations, aes(conc_num_factor, !!sym(var))) +
|
||||
# geom_point() + geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
|
||||
# coord_cartesian(ylim = c(-WT_sd, WT_sd)) +
|
||||
# geom_errorbar(aes(ymin = 0 - (2 * WT_sd), ymax = 0 + (2 * WT_sd)), alpha = 0.3) +
|
||||
# ggtitle(paste(df_calculations$OrfRep[1], df_calculations$Gene[1], sep = " ")) +
|
||||
# annotate("text", x = 1, y = 0.9 * WT_sd, label = paste("ZShift =", round(df_interactions[[paste0("Z_Shift_", var)]], 2))) +
|
||||
# annotate("text", x = 1, y = 0.7 * WT_sd, label = paste("lm Zscore =", round(df_interactions[[paste0("Z_lm_", var)]], 2))) +
|
||||
# annotate("text", x = 1, y = -0.7 * WT_sd, label = paste("NG =", df_interactions$NG)) +
|
||||
# annotate("text", x = 1, y = -0.9 * WT_sd, label = paste("DB =", df_interactions$DB)) +
|
||||
# annotate("text", x = 1, y = -1.1 * WT_sd, label = paste("SM =", df_interactions$SM)) +
|
||||
# scale_x_continuous(
|
||||
# name = unique(df_calculations$Drug[1]),
|
||||
# breaks = unique(df_calculations$conc_num_factor),
|
||||
# labels = unique(as.character(df_calculations$conc_num))) +
|
||||
# theme_publication()
|
||||
# })
|
||||
|
||||
save_plots(file_prefix, plot_list, output_dir)
|
||||
# save_plots(file_prefix, plot_list, output_dir)
|
||||
# }
|
||||
|
||||
# generate_summary_plots <- function(df, output_dir) {
|
||||
# variables <- c("L", "K", "r", "AUC")
|
||||
# plot_list <- lapply(variables, function(var) {
|
||||
# generate_plot(df, x_var = "conc_num_factor", y_var = var, plot_type = "scatter", title = paste("Summary Plot for", var))
|
||||
# })
|
||||
|
||||
# save_plots("Summary_Plots", plot_list, output_dir)
|
||||
# }
|
||||
|
||||
|
||||
# # Generate ranked plots for a specific metric
|
||||
# generate_ranked_plot <- function(df, rank_var, zscore_var, sd_threshold, title_prefix) {
|
||||
# ggplot(df, aes(x = {{rank_var}}, y = {{zscore_var}})) +
|
||||
# ggtitle(paste(title_prefix, "above", sd_threshold, "SD")) +
|
||||
# xlab("Rank") + ylab(paste("Avg Z score", title_prefix)) +
|
||||
# annotate("rect", xmin = -Inf, xmax = Inf, ymin = sd_threshold, ymax = Inf, fill = "#542788", alpha = 0.3) +
|
||||
# annotate("rect", xmin = -Inf, xmax = Inf, ymin = -sd_threshold, ymax = -Inf, fill = "orange", alpha = 0.3) +
|
||||
# geom_hline(yintercept = c(-sd_threshold, sd_threshold)) +
|
||||
# geom_point(size = 0.1, shape = 3) +
|
||||
# theme_publication()
|
||||
# }
|
||||
|
||||
# # Generate and save all ranked plots
|
||||
# generate_and_save_ranked_plots <- function(df, output_dir, prefix) {
|
||||
# rank_metrics <- list(
|
||||
# list("L_Rank", "Avg_Zscore_L", "L"),
|
||||
# list("K_Rank", "Avg_Zscore_K", "K"),
|
||||
# list("r_Rank", "Avg_Zscore_r", "r"),
|
||||
# list("AUC_Rank", "Avg_Zscore_AUC", "AUC"),
|
||||
# list("L_Rank_lm", "Z_lm_L", "L"),
|
||||
# list("K_Rank_lm", "Z_lm_K", "K"),
|
||||
# list("r_Rank_lm", "Z_lm_r", "r"),
|
||||
# list("AUC_Rank_lm", "Z_lm_AUC", "AUC")
|
||||
# )
|
||||
|
||||
# pdf(file.path(output_dir, paste0(prefix, ".pdf")), width = 18, height = 12, onefile = TRUE)
|
||||
|
||||
# for (sd_threshold in c(1, 2, 3)) {
|
||||
# for (metric in rank_metrics) {
|
||||
# plot <- generate_ranked_plot(df, sym(metric[[1]]), sym(metric[[2]]), sd_threshold, metric[[3]])
|
||||
# print(plot)
|
||||
# }
|
||||
# }
|
||||
|
||||
# dev.off()
|
||||
# }
|
||||
|
||||
|
||||
|
||||
|
||||
generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_num",
|
||||
title, x_label = NULL, y_label = NULL, ylim_vals = NULL) {
|
||||
plot <- ggplot(df, aes_string(x = x_var, color = color_var))
|
||||
|
||||
if (!is.null(y_var)) {
|
||||
plot <- plot + aes_string(y = y_var)
|
||||
}
|
||||
|
||||
# Set up the plot based on the requested plot type
|
||||
plot <- switch(plot_type,
|
||||
"scatter" = plot + geom_point() + geom_smooth(method = "lm", se = FALSE),
|
||||
"box" = plot + geom_boxplot(),
|
||||
"density" = plot + geom_density(),
|
||||
"bar" = plot + geom_bar(stat = "identity"),
|
||||
plot # Default: return the plot as is
|
||||
)
|
||||
|
||||
if (!is.null(ylim_vals)) {
|
||||
plot <- plot + coord_cartesian(ylim = ylim_vals)
|
||||
}
|
||||
|
||||
# Add titles and labels if available
|
||||
plot <- plot + ggtitle(title) + theme_publication()
|
||||
if (!is.null(x_label)) plot <- plot + xlab(x_label)
|
||||
if (!is.null(y_label)) plot <- plot + ylab(y_label)
|
||||
|
||||
return(plot)
|
||||
}
|
||||
|
||||
generate_summary_plots <- function(df, output_dir) {
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
plot_list <- lapply(variables, function(var) {
|
||||
generate_plot(df, x_var = "conc_num_factor", y_var = var, plot_type = "scatter", title = paste("Summary Plot for", var))
|
||||
})
|
||||
|
||||
generate_and_save_plots <- function(df, output_dir, file_prefix, variables, plot_type = "scatter", include_qc = FALSE, ylim_vals = NULL) {
|
||||
plots <- list()
|
||||
|
||||
save_plots("Summary_Plots", plot_list, output_dir)
|
||||
if (nrow(df) == 0) {
|
||||
message("The \"", deparse(substitute(df)), "\" dataframe is empty, skipping plots")
|
||||
return()
|
||||
}
|
||||
|
||||
message("Generating plots for \"", deparse(substitute(df)), "\" dataframe")
|
||||
|
||||
# Create plots for the given variables
|
||||
for (var in variables) {
|
||||
plot <- generate_plot(
|
||||
df = df,
|
||||
x_var = "scan",
|
||||
y_var = var,
|
||||
plot_type = plot_type,
|
||||
title = paste(file_prefix, "Plot for", var),
|
||||
ylim_vals = ylim_vals
|
||||
)
|
||||
plots[[paste0(var, "_", plot_type)]] <- plot
|
||||
}
|
||||
|
||||
# Include additional QC plots if requested
|
||||
if (include_qc) {
|
||||
plots[["Raw_L_vs_K"]] <- generate_plot(df, "L", "K", "scatter", "Raw L vs K before QC")
|
||||
plots[["Delta_bg_Density"]] <- generate_plot(df, "delta_bg", NULL, "density", "Density plot for Delta Background")
|
||||
plots[["Delta_bg_Bar"]] <- generate_plot(df, "delta_bg", NULL, "bar", "Bar plot for Delta Background")
|
||||
}
|
||||
|
||||
save_plots(file_prefix, plots, output_dir)
|
||||
}
|
||||
|
||||
|
||||
# Ensure all plots are saved and printed to PDF
|
||||
save_plots <- function(file_name, plot_list, output_dir) {
|
||||
# Save to PDF
|
||||
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
|
||||
lapply(plot_list, function(plot) {
|
||||
print(plot)
|
||||
})
|
||||
dev.off()
|
||||
|
||||
# Save to HTML with horizontal legend orientation
|
||||
lapply(names(plot_list), function(plot_name) {
|
||||
message("Generating plot: ", plot_name)
|
||||
pgg <- tryCatch({
|
||||
suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
|
||||
layout(legend = list(orientation = "h")))
|
||||
}, error = function(e) {
|
||||
message("Error in plot: ", plot_name, "\n", e)
|
||||
return(NULL)
|
||||
})
|
||||
if (!is.null(pgg)) {
|
||||
saveWidget(pgg,
|
||||
file = file.path(output_dir,
|
||||
paste0(file_name, "_", plot_name, ".html")),
|
||||
selfcontained = TRUE)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -472,42 +524,6 @@ adjust_missing_and_rank <- function(df) {
|
||||
return(df)
|
||||
}
|
||||
|
||||
# Generate ranked plots for a specific metric
|
||||
generate_ranked_plot <- function(df, rank_var, zscore_var, sd_threshold, title_prefix) {
|
||||
ggplot(df, aes(x = {{rank_var}}, y = {{zscore_var}})) +
|
||||
ggtitle(paste(title_prefix, "above", sd_threshold, "SD")) +
|
||||
xlab("Rank") + ylab(paste("Avg Z score", title_prefix)) +
|
||||
annotate("rect", xmin = -Inf, xmax = Inf, ymin = sd_threshold, ymax = Inf, fill = "#542788", alpha = 0.3) +
|
||||
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -sd_threshold, ymax = -Inf, fill = "orange", alpha = 0.3) +
|
||||
geom_hline(yintercept = c(-sd_threshold, sd_threshold)) +
|
||||
geom_point(size = 0.1, shape = 3) +
|
||||
theme_publication()
|
||||
}
|
||||
|
||||
# Generate and save all ranked plots
|
||||
generate_and_save_ranked_plots <- function(df, output_dir, prefix) {
|
||||
rank_metrics <- list(
|
||||
list("L_Rank", "Avg_Zscore_L", "L"),
|
||||
list("K_Rank", "Avg_Zscore_K", "K"),
|
||||
list("r_Rank", "Avg_Zscore_r", "r"),
|
||||
list("AUC_Rank", "Avg_Zscore_AUC", "AUC"),
|
||||
list("L_Rank_lm", "Z_lm_L", "L"),
|
||||
list("K_Rank_lm", "Z_lm_K", "K"),
|
||||
list("r_Rank_lm", "Z_lm_r", "r"),
|
||||
list("AUC_Rank_lm", "Z_lm_AUC", "AUC")
|
||||
)
|
||||
|
||||
pdf(file.path(output_dir, paste0(prefix, ".pdf")), width = 18, height = 12, onefile = TRUE)
|
||||
|
||||
for (sd_threshold in c(1, 2, 3)) {
|
||||
for (metric in rank_metrics) {
|
||||
plot <- generate_ranked_plot(df, sym(metric[[1]]), sym(metric[[2]]), sd_threshold, metric[[3]])
|
||||
print(plot)
|
||||
}
|
||||
}
|
||||
|
||||
dev.off()
|
||||
}
|
||||
|
||||
# Function to create and save all ranked plots
|
||||
create_ranked_plots <- function(df, output_dir) {
|
||||
@@ -520,8 +536,45 @@ create_ranked_plots <- function(df, output_dir) {
|
||||
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_lm_naRM")
|
||||
}
|
||||
|
||||
|
||||
generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num", title_prefix = "",
|
||||
x_label = NULL, y_label = NULL, ylim_vals = NULL, output_dir, file_prefix = "plot") {
|
||||
plot_list <- list()
|
||||
|
||||
# Iterate over each y variable and generate the corresponding plot
|
||||
for (var in y_vars) {
|
||||
plot <- ggplot(df, aes(x = !!sym(x_var), y = !!sym(var), color = as.factor(!!sym(color_var)))) +
|
||||
ggtitle(paste(title_prefix, var)) +
|
||||
theme_publication()
|
||||
|
||||
if (plot_type == "scatter") {
|
||||
plot <- plot +
|
||||
geom_point() +
|
||||
geom_smooth(method = "lm", formula = y ~ x, se = FALSE)
|
||||
|
||||
if (!is.null(ylim_vals)) {
|
||||
plot <- plot + coord_cartesian(ylim = ylim_vals)
|
||||
}
|
||||
} else if (plot_type == "box") {
|
||||
plot <- plot + geom_boxplot()
|
||||
} else if (plot_type == "density") {
|
||||
plot <- plot + geom_density()
|
||||
} else if (plot_type == "bar") {
|
||||
plot <- plot + geom_bar(stat = "identity")
|
||||
}
|
||||
|
||||
if (!is.null(x_label)) plot <- plot + xlab(x_label)
|
||||
if (!is.null(y_label)) plot <- plot + ylab(y_label)
|
||||
|
||||
plot_list[[var]] <- plot
|
||||
}
|
||||
|
||||
# Save plots to PDF and HTML
|
||||
save_plots(file_prefix, plot_list, output_dir)
|
||||
}
|
||||
|
||||
|
||||
main <- function() {
|
||||
# Applying to all experiments
|
||||
lapply(names(args$experiments), function(exp_name) {
|
||||
exp <- args$experiments[[exp_name]]
|
||||
exp_path <- exp$path
|
||||
@@ -537,34 +590,28 @@ main <- function() {
|
||||
|
||||
max_conc <- max(df$conc_num_factor)
|
||||
|
||||
# QC
|
||||
# Filter the df above sd tolerance for raw L vs K plot
|
||||
# QC steps and filtering
|
||||
df_above_tolerance <- df %>% filter(DB == 1)
|
||||
|
||||
# Set vars above the delta background tolerance to NA
|
||||
df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .x)))
|
||||
|
||||
# Remove rows where L is 0
|
||||
df_no_zeros <- df_na %>% filter(L > 0)
|
||||
|
||||
# Flag and remove non-finite data, printing affected rows
|
||||
|
||||
df_na_filtered <- df_na %>%
|
||||
filter(if_any(c(L), ~ !is.finite(.))) %>% # Add L, r, AUC, K as needed for debugging
|
||||
filter(if_any(c(L), ~ !is.finite(.))) %>%
|
||||
{
|
||||
if (nrow(.) > 0) {
|
||||
message("Removing non-finite rows:\n")
|
||||
print(head(., n = 10))
|
||||
}
|
||||
df_na %>% filter(if_all(c(L), is.finite)) # Add L, r, AUC, K as needed for debugging
|
||||
df_na %>% filter(if_all(c(L), is.finite))
|
||||
}
|
||||
|
||||
# Generate QC PDFs and HTMLs
|
||||
# message("Generating QC plots")
|
||||
# variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||
# generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
|
||||
# generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
|
||||
# generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", variables)
|
||||
# generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
|
||||
|
||||
# Generate and save QC plots using the new generalized function
|
||||
message("Generating QC plots")
|
||||
variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||
generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
|
||||
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
|
||||
generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", variables)
|
||||
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
|
||||
|
||||
rm(df, df_above_tolerance, df_no_zeros)
|
||||
|
||||
@@ -577,38 +624,36 @@ main <- function() {
|
||||
|
||||
# Originally this filtered L NA's
|
||||
|
||||
# Filter data within 2SD
|
||||
# Filter data within and outside 2SD
|
||||
within_2sd_k <- stats_joined %>%
|
||||
filter(K >= (K_mean - 2 * K_sd) & K <= (K_mean + 2 * K_sd))
|
||||
|
||||
# Filter data outside 2SD
|
||||
outside_2sd_k <- stats_joined %>%
|
||||
filter(K < (K_mean - 2 * K_sd) | K > (K_mean + 2 * K_sd))
|
||||
|
||||
message("Calculating summary statistics for L within 2SD of K")
|
||||
# Summary statistics for within and outside 2SD of K
|
||||
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
|
||||
# Write CSV files
|
||||
write.csv(l_within_2sd_k, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE)
|
||||
write.csv(l_outside_2sd_k, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE)
|
||||
|
||||
message("Calculating summary statistics for L within 2SD of K")
|
||||
cols_to_remove <- names(l_within_2sd_k)
|
||||
cols_to_keep <- c("conc_num", "conc_num_factor")
|
||||
within_2sd_k_clean <- within_2sd_k %>%
|
||||
select(-all_of(setdiff(cols_to_remove, cols_to_keep)))
|
||||
l_within_2sd_k_joined <- within_2sd_k_clean %>%
|
||||
left_join(l_within_2sd_k, by = c("conc_num", "conc_num_factor"))
|
||||
write.csv(l_within_2sd_k,
|
||||
file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
|
||||
message("Calculating summary statistics for for L outside 2SD of K")
|
||||
l_outside_2sd_k <- calculate_summary_stats(outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
cols_to_remove <- names(l_outside_2sd_k)
|
||||
cols_to_keep <- c("conc_num", "conc_num_factor")
|
||||
outside_2sd_k_clean <- outside_2sd_k %>%
|
||||
select(-all_of(setdiff(cols_to_remove, cols_to_keep)))
|
||||
l_outside_2sd_k_joined <- outside_2sd_k_clean %>%
|
||||
left_join(l_outside_2sd_k, by = c("conc_num", "conc_num_factor"))
|
||||
write.csv(l_outside_2sd_k,
|
||||
file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
|
||||
# Process background strains
|
||||
bg_strains <- c("YDL227C")
|
||||
lapply(bg_strains, function(strain) {
|
||||
@@ -646,18 +691,14 @@ main <- function() {
|
||||
filter(OrfRep != strain) %>%
|
||||
mutate(SM = 0)
|
||||
|
||||
message("Processing reference strain")
|
||||
reference_strain <- process_strains(l_within_2sd_k_joined)
|
||||
message("Processing deletion strains")
|
||||
deletion_strains <- process_strains(l_within_2sd_k_joined)
|
||||
|
||||
# TODO we may need to add "num" to grouping vars
|
||||
|
||||
# Calculate interactions
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
message("Calculating reference interaction scores")
|
||||
reference_results <- calculate_interaction_scores(stats_joined, reference_strain, max_conc, variables)
|
||||
message("Calculating deletion interaction scores")
|
||||
deletion_results <- calculate_interaction_scores(stats_joined, deletion_strains, max_conc, variables)
|
||||
|
||||
zscores_calculations_reference <- reference_results$zscores_calculations
|
||||
@@ -666,11 +707,11 @@ main <- function() {
|
||||
zscores_interactions <- deletion_results$zscores_interactions
|
||||
|
||||
# TODO: I don't know if we need this?
|
||||
zscores_interactions <- zscores_interactions %>%
|
||||
arrange(desc(Z_lm_L)) %>%
|
||||
arrange(desc(NG))
|
||||
# zscores_interactions <- zscores_interactions %>%
|
||||
# arrange(desc(Z_lm_L)) %>%
|
||||
# arrange(desc(NG))
|
||||
|
||||
message("Writing zscores csv files")
|
||||
# Writing Z-Scores to file
|
||||
write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
|
||||
write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE)
|
||||
write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
|
||||
@@ -678,10 +719,11 @@ main <- function() {
|
||||
|
||||
# Define conditions for enhancers and suppressors
|
||||
# TODO Add to study config file?
|
||||
enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= 2
|
||||
suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -2
|
||||
enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= 2
|
||||
suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -2
|
||||
threshold <- 2
|
||||
enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= threshold
|
||||
suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -threshold
|
||||
enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= threshold
|
||||
suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -threshold
|
||||
|
||||
# Subset data
|
||||
enhancers_L <- zscores_interactions[enhancer_condition_L, ]
|
||||
@@ -707,10 +749,11 @@ main <- function() {
|
||||
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE)
|
||||
|
||||
# Handle linear model based enhancers and suppressors
|
||||
enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= 2, ]
|
||||
suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -2, ]
|
||||
enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= 2, ]
|
||||
suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -2, ]
|
||||
lm_threshold <- 2
|
||||
enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= lm_threshold, ]
|
||||
suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -lm_threshold, ]
|
||||
enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= lm_threshold, ]
|
||||
suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -lm_threshold, ]
|
||||
|
||||
# Save linear model based enhancers and suppressors
|
||||
message("Writing linear model enhancer/suppressor csv files")
|
||||
@@ -724,9 +767,11 @@ main <- function() {
|
||||
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE)
|
||||
|
||||
# Generate plots for interaction scores
|
||||
generate_rf_plots(zscores_calculations_reference, zscores_interactions_reference, out_dir)
|
||||
generate_rf_plots(zscores_calculations, zscores_interactions, out_dir)
|
||||
|
||||
# generate_rf_plots(zscores_calculations_reference, zscores_interactions_reference, out_dir)
|
||||
# generate_rf_plots(zscores_calculations, zscores_interactions, out_dir)
|
||||
generate_and_save_plots(zscores_calculations_reference, out_dir, "Reference_Calculations", variables)
|
||||
generate_and_save_plots(zscores_calculations, out_dir, "Deletion_Calculations", variables)
|
||||
|
||||
# Apply filtering to remove NA values before further analysis
|
||||
zscores_interactions_filtered <- zscores_interactions %>%
|
||||
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L))
|
||||
|
||||
Reference in New Issue
Block a user