Improve generate_and_save_plots()

This commit is contained in:
2024-09-09 19:20:45 -04:00
parent 8f2f8d28f0
commit dc5501ea5d

View File

@@ -143,13 +143,10 @@ update_gene_names <- function(df, sgd_gene_list) {
# Create a named vector for mapping ORF to GeneName
gene_map <- setNames(genes$V5, genes$V4)
# Vectorized match to find the GeneName from gene_map
mapped_genes <- gene_map[df$ORF]
# Replace NAs in mapped_genes with original Gene names (preserves existing Gene names if ORF is not found)
updated_genes <- ifelse(is.na(mapped_genes) | df$OrfRep == "YDL227C", df$Gene, mapped_genes)
# Ensure Gene is not left blank or incorrectly updated to "OCT1"
df <- df %>%
mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes))
@@ -313,45 +310,76 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
}
generate_plot <- function(df, x_var, y_var = NULL, plot_type = "scatter", color_var = "conc_num",
title = "", x_label = NULL, y_label = NULL, ylim_vals = NULL, annotations = NULL) {
generate_and_save_plots <- function(output_dir, file_name, plot_configs) {
plot <- ggplot(df, aes(x = !!sym(x_var), color = as.factor(!!sym(color_var))))
`%||%` <- function(a, b) if (!is.null(a)) a else b
if (!is.null(y_var)) {
plot <- plot + aes(y = !!sym(y_var))
# Generalized plot generation
plots <- lapply(plot_configs, function(config) {
# Add scatter points with summary stats if `y_var` is present
y_mean_col <- paste0("mean_", y_var)
y_sd_col <- paste0("sd_", y_var)
# Use the dataframe from the plot configuration
df <- config$df
plot <- plot + geom_point(shape = 3, size = 0.2) +
# Initialize the basic ggplot
plot <- ggplot(df, aes(x = if (config$plot_type == "box") as.factor(!!sym(config$x_var)) else !!sym(config$x_var),
color = as.factor(!!sym(config$color_var))))
# Handle specific y_var cases, like "delta_bg"
if (!is.null(config$y_var)) {
plot <- plot + aes(y = !!sym(config$y_var))
y_mean_col <- paste0("mean_", config$y_var)
y_sd_col <- paste0("sd_", config$y_var)
# Special case for "delta_bg" to add jitter
if (config$y_var == "delta_bg" && config$plot_type == "scatter") {
plot <- plot + geom_point(shape = 3, size = 0.2, position = "jitter") +
geom_errorbar(aes(
ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)),
width = 0.1) +
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
} else if (config$error_bar %||% FALSE) {
plot <- plot +
geom_point(shape = 3, size = 0.2) +
geom_errorbar(aes(
ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)),
width = 0.1) +
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6) +
stat_summary(aes(
ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)),
fun.data = "identity", geom = "errorbar", width = 0.1) +
stat_summary(aes(y = !!sym(y_mean_col)),
fun.data = "identity", geom = "point", size = 0.6)
}
}
# Add the specified plot type
plot <- switch(plot_type,
# Switch plot type (scatter, box, density, bar)
plot <- switch(config$plot_type,
"box" = plot + geom_boxplot(),
"density" = plot + geom_density(),
"bar" = plot + geom_bar(stat = "identity"),
plot + geom_point() + geom_smooth(method = "lm", se = FALSE) # Default is scatter
)
if (!is.null(ylim_vals)) {
plot <- plot + coord_cartesian(ylim = ylim_vals)
# Apply y-axis limits if provided
if (!is.null(config$ylim_vals)) {
plot <- plot + coord_cartesian(ylim = config$ylim_vals)
}
plot <- plot + ggtitle(title) + theme_publication()
# Add title and publication theme with custom legend position
legend_position <- config$legend_position %||% "bottom" # Default to bottom if not specified
plot <- plot + ggtitle(config$title) + theme_publication(legend_position = legend_position)
if (!is.null(x_label)) plot <- plot + xlab(x_label)
if (!is.null(y_label)) plot <- plot + ylab(y_label)
# Add x and y axis labels if provided
if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label)
if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
if (!is.null(annotations)) {
for (annotation in annotations) {
# Add custom annotations if provided
if (!is.null(config$annotations)) {
for (annotation in config$annotations) {
plot <- plot + annotate("text",
x = annotation$x,
y = annotation$y,
@@ -360,57 +388,24 @@ generate_plot <- function(df, x_var, y_var = NULL, plot_type = "scatter", color_
}
return(plot)
}
})
generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) {
plots <- list()
if (nrow(df) == 0) {
message("The dataframe is empty, skipping plots")
return()
}
message("Generating plots for dataframe")
for (config in plot_configs) {
plot <- generate_plot(
df = df,
x_var = config$x_var,
y_var = config$y_var,
plot_type = config$plot_type,
title = config$title,
ylim_vals = config$ylim_vals,
annotations = config$annotations
)
plots[[paste0(config$y_var, "_", config$plot_type)]] <- plot
}
# Save plots to PDF and HTML
save_plots(file_prefix, plots, output_dir)
}
# Standardized saving of plots
save_plots <- function(file_name, plot_list, output_dir) {
# Save plots to PDF
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
lapply(plot_list, print)
lapply(plots, print)
dev.off()
lapply(names(plot_list), function(plot_name) {
pgg <- tryCatch({
suppressWarnings(ggplotly(plot_list[[plot_name]]) %>%
# Convert plots to plotly and combine them using subplot
plotly_plots <- lapply(plots, function(plot) {
suppressWarnings(ggplotly(plot) %>%
layout(legend = list(orientation = "h")))
}, error = function(e) {
message("Error generating plot: ", plot_name)
return(NULL)
})
if (!is.null(pgg)) {
saveWidget(pgg,
file = file.path(output_dir, paste0(file_name, "_", plot_name, ".html")),
selfcontained = TRUE)
}
})
# Combine all plotly plots into a single interactive layout using subplot
combined_plot <- subplot(plotly_plots, nrows = length(plotly_plots), margin = 0.05)
# Save the combined HTML file
saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
}
interaction_plot_configs <- function(df, variables) {
@@ -493,42 +488,6 @@ adjust_missing_and_rank <- function(df, variables) {
return(df)
}
generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num", title_prefix = "",
x_label = NULL, y_label = NULL, ylim_vals = NULL, output_dir, file_prefix = "plot") {
plot_list <- list()
# Iterate over each y variable and generate the corresponding plot
for (var in y_vars) {
plot <- ggplot(df, aes(x = !!sym(x_var), y = !!sym(var), color = as.factor(!!sym(color_var)))) +
ggtitle(paste(title_prefix, var)) +
theme_publication()
if (plot_type == "scatter") {
plot <- plot +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE)
if (!is.null(ylim_vals)) {
plot <- plot + coord_cartesian(ylim = ylim_vals)
}
} else if (plot_type == "box") {
plot <- plot + geom_boxplot()
} else if (plot_type == "density") {
plot <- plot + geom_density()
} else if (plot_type == "bar") {
plot <- plot + geom_bar()
}
if (!is.null(x_label)) plot <- plot + xlab(x_label)
if (!is.null(y_label)) plot <- plot + ylab(y_label)
plot_list[[var]] <- plot
}
# Save plots to PDF and HTML
save_plots(file_prefix, plot_list, output_dir)
}
main <- function() {
lapply(names(args$experiments), function(exp_name) {
exp <- args$experiments[[exp_name]]
@@ -540,7 +499,7 @@ main <- function() {
dir.create(out_dir_qc, recursive = TRUE, showWarnings = FALSE)
# Load and process data
df <- load_and_process_data(args$easy_results_file, exp_sd)
df <- load_and_process_data(args$easy_results_file, sd = exp_sd)
df <- update_gene_names(df, args$sgd_gene_list)
max_conc <- max(df$conc_num_factor)
@@ -583,40 +542,223 @@ main <- function() {
df_na %>% filter(if_all(c(L), is.finite))
}
# Define the plot configurations for each QC step
qc_plot_configs <- list(
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "Raw L vs K before QC", ylim_vals = NULL),
list(x_var = "delta_bg", y_var = NULL, plot_type = "density", title = "Density plot for Delta Background", ylim_vals = NULL),
list(x_var = "as.factor(delta_bg)", y_var = NULL, plot_type = "bar", title = "Bar plot for Delta Background", ylim_vals = NULL)
# Plot configurations
# Each list corresponds to a group of plots in the same file
raw_l_vs_k_plots <- list(
list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
title = "Raw L vs K before QC",
color_var = "conc_num",
legend_position = "right")
)
above_threshold_plots <- list(
list(df = df_above_tolerance, x_var = "L", y_var = "K", plot_type = "scatter",
title = paste("Raw L vs K for strains above delta background threshold of", df_above_tolerance$delta_bg_tolerance[[1]], "or above"),
color_var = "conc_num",
annotations = list(
list(
x = L_half_median,
y = K_half_median,
label = paste("Strains above delta background tolerance =", nrow(df_above_tolerance))
)
),
error_bar = FALSE,
legend_position = "right"
)
)
frequency_delta_bg_plots <- list(
list(df = df, x_var = "delta_bg", y_var = NULL, plot_type = "density",
title = "Density plot for Delta Background by Conc All Data",
color_var = "conc_num",
x_label = "Delta Background",
y_label = "Density",
error_bar = FALSE,
legend_position = "right"
),
list(df = df, x_var = "delta_bg", y_var = NULL, plot_type = "bar",
title = "Bar plot for Delta Background by Conc All Data",
color_var = "conc_num",
x_label = "Delta Background",
y_label = "Count",
error_bar = FALSE,
legend_position = "right"
)
)
plate_analysis_plots <- list()
for (plot_type in c("scatter", "box")) {
variables <- c("L", "K", "r", "AUC", "delta_bg")
for (var in variables) {
for (stage in c("before", "after")) {
if (stage == "before") {
df_plot <- df
} else {
df_plot <- df_na # TODO use df_na_filtered if necessary
}
# Set error_bar = TRUE only for scatter plots
error_bar <- ifelse(plot_type == "scatter", TRUE, FALSE)
# Create the plot configuration
plot_config <- list(
df = df_plot,
x_var = "scan",
y_var = var,
plot_type = plot_type,
title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
error_bar = error_bar,
color_var = "conc_num"
)
plate_analysis_plots <- append(plate_analysis_plots, list(plot_config))
}
}
}
plate_analysis_no_zero_plots <- list()
for (plot_type in c("scatter", "box")) {
variables <- c("L", "K", "r", "AUC", "delta_bg")
for (var in variables) {
# Set error_bar = TRUE only for scatter plots
error_bar <- ifelse(plot_type == "scatter", TRUE, FALSE)
# Create the plot configuration
plot_config <- list(
df = df_no_zeros,
x_var = "scan",
y_var = var,
plot_type = plot_type,
title = paste("Plate analysis by Drug Conc for", var, "after quality control"),
error_bar = error_bar,
color_var = "conc_num"
)
plate_analysis_plots <- append(plate_analysis_plots, list(plot_config))
}
}
delta_bg_plot_configs <- list(
list(x_var = "delta_bg", y_var = NULL, plot_type = "density",
title = paste("Raw L vs K for strains above delta background threshold of", delta_bg_tolerance, "or above"),
ylim_vals = NULL,
annotate("text", x = L_half_median, y = K_half_median,
label = paste("Strains above delta background tolerance = ", nrow(df_above_tolerance)))
)
)
before_qc_configs <- list(
list(x_var = "scan", y_var = "delta_bg", plot_type = "scatter",
title = "Plate analysis by Drug Conc for Delta Background before QC",
error_bar = TRUE, color_var = "conc_num"),
list(x_var = "scan", y_var = "delta_bg", plot_type = "box",
title = "Plate analysis by Drug Conc for Delta Background before QC",
error_bar = FALSE, color_var = "conc_num")
)
above_delta_bg_tolerance <- list(
)
# Print quality control graphs before removing data due to contamination and
# adjusting missing data to max theoretical values
before_qc_plot_configs <- list(
# Plate analysis is a quality check to identify plate effects containing anomalies
)
# list(x_var = "delta_bg", y_var = NULL, plot_type = "density",
# title = "Density plot for Delta Background",
# ylim_vals = NULL),
# list(x_var = "as.factor(delta_bg)", y_var = NULL, plot_type = "bar",
# title = "Bar plot for Delta Background",
# ylim_vals = NULL)
above_tolerance_plot_configs <- list(
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "L vs K for Rows Above Delta Background Threshold", ylim_vals = NULL)
list(x_var = "L", y_var = "K", plot_type = "scatter",
title = "Raw L vs K for strains above delta background threshold", ylim_vals = NULL)
)
after_qc_plot_configs <- list(
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "L vs K After QC Filtering", ylim_vals = NULL)
list(x_var = "L", y_var = "K", plot_type = "scatter",
title = "L vs K After QC Filtering", ylim_vals = NULL)
)
no_zeros_plot_configs <- list(
list(x_var = "scan", y_var = "L", plot_type = "scatter", title = "L vs Scan for Non-Zero L Values"),
list(x_var = "scan", y_var = "K", plot_type = "scatter", title = "K vs Scan for Non-Zero L Values"),
list(x_var = "scan", y_var = "r", plot_type = "scatter", title = "r vs Scan for Non-Zero L Values"),
list(x_var = "scan", y_var = "AUC", plot_type = "scatter", title = "AUC vs Scan for Non-Zero L Values"),
list(x_var = "scan", y_var = "delta_bg", plot_type = "scatter", title = "Delta background vs Scan for Non-Zero L Values"),
list(x_var = "as.factor(scan)", y_var = "L", plot_type = "box", title = "L vs Scan for Non-Zero L Values"),
list(x_var = "as.factor(scan)", y_var = "K", plot_type = "box", title = "K vs Scan for Non-Zero L Values"),
list(x_var = "as.factor(scan)", y_var = "r", plot_type = "box", title = "r vs Scan for Non-Zero L Values"),
list(x_var = "as.factor(scan)", y_var = "AUC", plot_type = "box", title = "AUC vs Scan for Non-Zero L Values"),
list(x_var = "as.factor(scan)", y_var = "delta_bg", plot_type = "box", title = "Delta background vs Scan for Non-Zero L Values")
list(x_var = "scan", y_var = "L", plot_type = "scatter",
title = "Plate analysis by Drug Conc for L after quality control",
file_name = "Plate_Analysis_L_afterQC_Z", error_bar = TRUE, color_var = "conc_num"),
list(x_var = "scan", y_var = "K", plot_type = "scatter",
title = "Plate analysis by Drug Conc for K after quality control",
file_name = "Plate_Analysis_K_afterQC_Z", error_bar = TRUE, color_var = "conc_num"),
list(x_var = "scan", y_var = "r", plot_type = "scatter",
title = "Plate analysis by Drug Conc for r after quality control",
file_name = "Plate_Analysis_r_afterQC_Z", error_bar = TRUE, color_var = "conc_num"),
list(x_var = "scan", y_var = "AUC", plot_type = "scatter",
title = "Plate analysis by Drug Conc for AUC after quality control",
file_name = "Plate_Analysis_AUC_afterQC_Z", error_bar = TRUE, color_var = "conc_num"),
list(x_var = "scan", y_var = "delta_bg", plot_type = "scatter",
title = "Plate analysis by Drug Conc for Delta_Backgrd after quality control",
file_name = "Plate_Analysis_Delta_Backgrd_afterQC_Z", error_bar = TRUE, color_var = "conc_num"),
# Box plots
list(x_var = "as.factor(scan)", y_var = "L", plot_type = "box",
title = "Plate analysis by Drug Conc for L after quality control",
file_name = "Plate_Analysis_L_Box_afterQC_Z"),
list(x_var = "as.factor(scan)", y_var = "K", plot_type = "box",
title = "Plate analysis by Drug Conc for K after quality control",
file_name = "Plate_Analysis_K_Box_afterQC_Z"),
list(x_var = "as.factor(scan)", y_var = "r", plot_type = "box",
title = "Plate analysis by Drug Conc for r after quality control",
file_name = "Plate_Analysis_r_Box_afterQC_Z"),
list(x_var = "as.factor(scan)", y_var = "AUC", plot_type = "box",
title = "Plate analysis by Drug Conc for AUC after quality control",
file_name = "Plate_Analysis_AUC_Box_afterQC_Z"),
list(x_var = "as.factor(scan)", y_var = "delta_bg", plot_type = "box",
title = "Plate analysis by Drug Conc for Delta_Backgrd after quality control",
file_name = "Plate_Analysis_Delta_Backgrd_Box_afterQC_Z")
)
# Generate and save plots for each QC step
message("Generating QC plots")
generate_and_save_plots(df, out_dir_qc, "Before_QC", qc_plot_configs)
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Above_Tolerance", above_tolerance_plot_configs)
generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", after_qc_plot_configs)
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", no_zeros_plot_configs)
generate_and_save_plots(out_dir_qc, "L_vs_K_before_QC", raw_l_vs_k_plots)
generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
generate_and_save_plots(df, out_dir_qc, "raw_L_vs_K_before_QC.pdf", delta_bg_plots)
generate_and_save_plots(df, out_dir_qc, "plate_analysis", before_qc_plot_configs)
generate_and_save_plots(df_above_tolerance, out_dir_qc, above_tolerance_plot_configs)
generate_and_save_plots(df_na_filtered, out_dir_qc, after_qc_plot_configs)
generate_and_save_plots(df_no_zeros, out_dir_qc, "plate_analysis_no_zeros", no_zeros_plot_configs)
# Clean up
rm(df, df_above_tolerance, df_no_zeros)
@@ -713,9 +855,9 @@ main <- function() {
# Create interaction plots
deletion_plot_configs <- interaction_plot_configs(df_reference, variables)
generate_and_save_plots(zscores_calculations, out_dir, "RF_InteractionPlots", deletion_plot_configs)
generate_and_save_plots(zscores_calculations, out_dir, deletion_plot_configs)
deletion_plot_configs <- interaction_plot_configs(df_deletion, variables)
generate_and_save_plots(zscores_calculations, out_dir, "InteractionPlots", deletion_plot_configs)
generate_and_save_plots(zscores_calculations, out_dir, deletion_plot_configs)
# Define conditions for enhancers and suppressors
# TODO Add to study config file?
@@ -766,14 +908,6 @@ main <- function() {
write.csv(suppressors_lm_K,
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE)
# Interaction plots for reference strain
variables <- c("L", "K", "r", "AUC")
reference_plot_configs <- interaction_plot_configs(df_reference, variables)
generate_and_save_plots(zscores_calculation_reference, out_dir, "RF_InteractionPlots", reference_plot_configs)
deletion_plot_configs <- interaction_plot_configs(df_deletion, variables)
generate_and_save_plots(zscores_calculations, out_dir, "InteractionPlots", deletion_plot_configs)
# Correlation plots
lm_list <- list(
lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
@@ -786,7 +920,7 @@ main <- function() {
lm_summaries <- lapply(lm_list, summary)
correlation_plot_config <- correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
generate_and_save_plots(zscores_interactions_filtered, output_dir, "CorrelationPlots", correlation_plot_config)
generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_config)
# Generate ranked plots
rank_plot_config <- list(
@@ -796,7 +930,7 @@ main <- function() {
list(x_var = "AUC_Rank", y_var = "Avg_Zscore_AUC", plot_type = "scatter", title = "Rank vs Avg Z score for AUC")
)
# Generate and save rank plots using the existing plotting framework
generate_and_save_plots(zscores_interactions_filtered, output_dir, "RankPlots", rank_plot_config)
generate_and_save_plots(zscores_interactions_filtered, output_dir, rank_plot_config)
})
})
}