Refactor grid plots

This commit is contained in:
2024-09-09 23:21:22 -04:00
parent 1f682c9cbc
commit 356b47106e

View File

@@ -119,8 +119,7 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
# Rename columns
rename(L = l, num = Num., AUC = AUC96, scan = Scan, last_bg = LstBackgrd, first_bg = X1stBackgrd) %>%
mutate(
across(c(Col, Row, num), as.numeric),
across(c(L, K, r, scan, AUC, last_bg, first_bg), as.numeric),
across(c(Col, Row, num, L, K, r, scan, AUC, last_bg, first_bg), as.numeric),
delta_bg = last_bg - first_bg,
delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + (sd * sd(delta_bg, na.rm = TRUE)),
NG = if_else(L == 0 & !is.na(L), 1, 0),
@@ -300,98 +299,100 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
}
generate_and_save_plots <- function(output_dir, file_name, plot_configs) {
# Main function
generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
`%||%` <- function(a, b) if (!is.null(a)) a else b
# Generalized plot generation
plots <- lapply(plot_configs, function(config) {
# Use the dataframe from the plot configuration
df <- config$df
# Initialize the basic ggplot
plot <- ggplot(df, aes(x = if (config$plot_type == "box") as.factor(!!sym(config$x_var)) else !!sym(config$x_var),
color = as.factor(!!sym(config$color_var))))
# Handle specific y_var cases, like "delta_bg"
if (!is.null(config$y_var)) {
plot <- plot + aes(y = !!sym(config$y_var))
y_mean_col <- paste0("mean_", config$y_var)
y_sd_col <- paste0("sd_", config$y_var)
# Special case for "delta_bg" to add jitter
if (config$y_var == "delta_bg" && config$plot_type == "scatter") {
plot <- plot + geom_point(shape = 3, size = 0.2, position = "jitter") +
geom_errorbar(aes(
ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)),
width = 0.1) +
geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
} else if (config$error_bar %||% FALSE) {
# Directly use geom_point and geom_errorbar with pre-calculated values
plot <- plot +
geom_point(shape = 3, size = 0.2) +
geom_errorbar(aes(
ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)),
width = 0.1) +
geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
}
}
# Switch plot type (scatter, box, density, bar)
plot <- switch(config$plot_type,
"box" = plot + geom_boxplot(),
"density" = plot + geom_density(),
"bar" = plot + geom_bar(stat = "identity"),
plot + geom_point() + geom_smooth(method = "lm", se = FALSE) # Default is scatter
)
plot + geom_point() + geom_smooth(method = "lm", se = FALSE))
# Apply y-axis limits if provided
if (!is.null(config$ylim_vals)) {
plot <- plot + coord_cartesian(ylim = config$ylim_vals)
}
# Add title and publication theme with custom legend position
legend_position <- config$legend_position %||% "bottom" # Default to bottom if not specified
legend_position <- config$legend_position %||% "bottom"
plot <- plot + ggtitle(config$title) + theme_publication(legend_position = legend_position)
# Add x and y axis labels if provided
if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label)
if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
# Add custom annotations if provided
if (!is.null(config$annotations)) {
for (annotation in config$annotations) {
plot <- plot + annotate("text",
x = annotation$x,
y = annotation$y,
label = annotation$label)
plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label)
}
}
return(plot)
})
# Save plots to PDF
# If grid_layout is provided, arrange plots in a grid and save in a single PDF
if (!is.null(grid_layout)) {
pdf(file.path(output_dir, paste0(file_name, "_grid.pdf")), width = 14, height = 9)
# Loop through plots in chunks defined by ncol and nrow
for (start_idx in seq(1, length(plots), by = grid_layout$ncol * grid_layout$nrow)) {
end_idx <- min(start_idx + grid_layout$ncol * grid_layout$nrow - 1, length(plots))
plot_subset <- plots[start_idx:end_idx]
# Arrange plots in a grid
do.call(grid.arrange, c(plot_subset, ncol = grid_layout$ncol, nrow = grid_layout$nrow))
}
dev.off()
# Save as HTML (optional)
plotly_plots <- lapply(plots, function(plot) {
suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))
})
combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow, margin = 0.05)
saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, "_grid.html")), selfcontained = TRUE)
} else {
# Save individual plots to PDF
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
lapply(plots, print)
dev.off()
# Convert plots to plotly and combine them using subplot
# Convert plots to plotly and save as HTML
plotly_plots <- lapply(plots, function(plot) {
suppressWarnings(ggplotly(plot) %>%
layout(legend = list(orientation = "h")))
suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))
})
# Combine all plotly plots into a single interactive layout using subplot
combined_plot <- subplot(plotly_plots, nrows = length(plotly_plots), margin = 0.05)
# Save the combined HTML file
saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
}
}
interaction_plot_configs <- function(df, variables) {
plot_configs <- list()
@@ -428,18 +429,18 @@ interaction_plot_configs <- function(df, variables) {
# Append a new plot configuration for each variable
plot_configs[[length(plot_configs) + 1]] <- list(
df = df,
x_var = "Conc_Num_Factor",
x_var = "conc_num_factor",
y_var = delta_var,
plot_type = "scatter",
title = paste(df$OrfRep[1], df$Gene[1], sep = " "),
title = sprintf("%s %s", df$OrfRep[1], df$Gene[1]),
ylim_vals = ylim_vals,
annotations = annotations,
error_bar = list(
ymin = 0 - (2 * df[[wt_sd_col]][1]),
ymax = 0 + (2 * df[[wt_sd_col]][1])
),
x_breaks = unique(df$Conc_Num_Factor),
x_labels = unique(as.character(df$Conc_Num)),
x_breaks = unique(df$conc_num_factor),
x_labels = unique(as.character(df$conc_num)),
x_label = unique(df$Drug[1])
)
}
@@ -447,9 +448,6 @@ interaction_plot_configs <- function(df, variables) {
return(plot_configs)
}
correlation_plot_configs <- function(df, lm_list, lm_summaries) {
lapply(seq_along(lm_list), function(i) {
r_squared <- round(lm_summaries[[i]]$r.squared, 3)
@@ -555,6 +553,9 @@ main <- function() {
# Plot configurations
# Each list corresponds to a group of plots in the same file
# Print quality control graphs before removing data due to contamination and
# adjusting missing data to max theoretical values
l_vs_k_plots <- list(
list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
title = "Raw L vs K before QC",
@@ -672,74 +673,6 @@ main <- function() {
generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
# Print quality control graphs before removing data due to contamination and
# adjusting missing data to max theoretical values
# list(x_var = "delta_bg", y_var = NULL, plot_type = "density",
# title = "Density plot for Delta Background",
# ylim_vals = NULL),
# list(x_var = "as.factor(delta_bg)", y_var = NULL, plot_type = "bar",
# title = "Bar plot for Delta Background",
# ylim_vals = NULL)
above_tolerance_plot_configs <- list(
list(x_var = "L", y_var = "K", plot_type = "scatter",
title = "Raw L vs K for strains above delta background threshold", ylim_vals = NULL)
)
after_qc_plot_configs <- list(
list(x_var = "L", y_var = "K", plot_type = "scatter",
title = "L vs K After QC Filtering", ylim_vals = NULL)
)
no_zeros_plot_configs <- list(
list(x_var = "scan", y_var = "L", plot_type = "scatter",
title = "Plate analysis by Drug Conc for L after quality control",
file_name = "Plate_Analysis_L_afterQC_Z", error_bar = TRUE, color_var = "conc_num"),
list(x_var = "scan", y_var = "K", plot_type = "scatter",
title = "Plate analysis by Drug Conc for K after quality control",
file_name = "Plate_Analysis_K_afterQC_Z", error_bar = TRUE, color_var = "conc_num"),
list(x_var = "scan", y_var = "r", plot_type = "scatter",
title = "Plate analysis by Drug Conc for r after quality control",
file_name = "Plate_Analysis_r_afterQC_Z", error_bar = TRUE, color_var = "conc_num"),
list(x_var = "scan", y_var = "AUC", plot_type = "scatter",
title = "Plate analysis by Drug Conc for AUC after quality control",
file_name = "Plate_Analysis_AUC_afterQC_Z", error_bar = TRUE, color_var = "conc_num"),
list(x_var = "scan", y_var = "delta_bg", plot_type = "scatter",
title = "Plate analysis by Drug Conc for Delta_Backgrd after quality control",
file_name = "Plate_Analysis_Delta_Backgrd_afterQC_Z", error_bar = TRUE, color_var = "conc_num"),
# Box plots
list(x_var = "as.factor(scan)", y_var = "L", plot_type = "box",
title = "Plate analysis by Drug Conc for L after quality control",
file_name = "Plate_Analysis_L_Box_afterQC_Z"),
list(x_var = "as.factor(scan)", y_var = "K", plot_type = "box",
title = "Plate analysis by Drug Conc for K after quality control",
file_name = "Plate_Analysis_K_Box_afterQC_Z"),
list(x_var = "as.factor(scan)", y_var = "r", plot_type = "box",
title = "Plate analysis by Drug Conc for r after quality control",
file_name = "Plate_Analysis_r_Box_afterQC_Z"),
list(x_var = "as.factor(scan)", y_var = "AUC", plot_type = "box",
title = "Plate analysis by Drug Conc for AUC after quality control",
file_name = "Plate_Analysis_AUC_Box_afterQC_Z"),
list(x_var = "as.factor(scan)", y_var = "delta_bg", plot_type = "box",
title = "Plate analysis by Drug Conc for Delta_Backgrd after quality control",
file_name = "Plate_Analysis_Delta_Backgrd_Box_afterQC_Z")
)
# generate_and_save_plots(df, out_dir_qc, "raw_L_vs_K_before_QC.pdf", delta_bg_plots)
# generate_and_save_plots(df, out_dir_qc, "plate_analysis", before_qc_plot_configs)
# generate_and_save_plots(df_above_tolerance, out_dir_qc, above_tolerance_plot_configs)
# generate_and_save_plots(df_na_filtered, out_dir_qc, after_qc_plot_configs)
# generate_and_save_plots(df_no_zeros, out_dir_qc, "plate_analysis_no_zeros", no_zeros_plot_configs)
# Clean up
rm(df, df_above_tolerance, df_no_zeros)
@@ -774,9 +707,6 @@ main <- function() {
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
row.names = FALSE)
#print("Background summary stats:")
#print(head(summary_stats_bg))
# Filter reference and deletion strains
# Formerly X2_RF (reference strains)
df_reference <- df_na_stats %>%
@@ -831,10 +761,10 @@ main <- function() {
write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
# Create interaction plots
deletion_plot_configs <- interaction_plot_configs(df_reference, variables)
generate_and_save_plots(zscores_calculations, out_dir, deletion_plot_configs)
reference_plot_configs <- interaction_plot_configs(df_reference, variables)
deletion_plot_configs <- interaction_plot_configs(df_deletion, variables)
generate_and_save_plots(zscores_calculations, out_dir, deletion_plot_configs)
generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
# Define conditions for enhancers and suppressors
# TODO Add to study config file?