suppressMessages({ library("ggplot2") library("plotly") library("htmlwidgets") library("dplyr") library("ggthemes") }) # Constants for configuration PLOT_WIDTH <- 14 PLOT_HEIGHT <- 9 BASE_SIZE <- 14 # BACKUP_SUFFIX <- paste0(".backup_", Sys.Date()) options(warn = 2, max.print = 100) # Function to parse arguments parse_arguments <- function() { if (interactive()) { args <- c( "1", "3", "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv", "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab", "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2/results_std.txt", "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1/zscores" ) } else { args <- commandArgs(trailingOnly = TRUE) } list( exp_number = as.numeric(args[1]), delta_bg_factor = as.numeric(args[2]), study_info_file = normalizePath(file.path(args[3]), mustWork = FALSE), sgd_gene_list = normalizePath(file.path(args[4]), mustWork = FALSE), input_file = normalizePath(file.path(args[5]), mustWork = FALSE), out_dir = normalizePath(file.path(args[6]), mustWork = FALSE), out_dir_qc = normalizePath(file.path(args[6], "qc"), mustWork = FALSE) ) } args <- parse_arguments() dir.create(args$out_dir, showWarnings = FALSE) dir.create(args$out_dir_qc, showWarnings = FALSE) # Define themes and scales theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") { theme_foundation(base_size = base_size, base_family = base_family) + theme( plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), text = element_text(), panel.background = element_rect(colour = NA), plot.background = element_rect(colour = NA), panel.border = element_rect(colour = NA), axis.title = element_text(face = "bold", size = rel(1)), axis.title.y = element_text(angle = 90, vjust = 2), axis.title.x = element_text(vjust = -0.2), axis.line = element_line(colour = "black"), panel.grid.major = element_line(colour = "#f0f0f0"), panel.grid.minor = element_blank(), legend.key = element_rect(colour = NA), legend.position = "bottom", legend.direction = "horizontal", plot.margin = unit(c(10, 5, 5, 5), "mm"), strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), strip.text = element_text(face = "bold") ) } theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") { theme_publication(base_size, base_family) + theme( legend.position = "right", legend.direction = "vertical", legend.key.size = unit(0.5, "cm"), legend.spacing = unit(0, "cm"), legend.title = element_text(face = "italic") ) } scale_fill_publication <- function(...) { discrete_scale("fill", "Publication", manual_pal(values = c( "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" )), ...) } scale_colour_publication <- function(...) { discrete_scale("colour", "Publication", manual_pal(values = c( "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" )), ...) } # Function to load and preprocess data load_and_preprocess_data <- function(input_file) { df <- tryCatch({ read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) }, error = function(e) { stop("Error reading file: ", input_file, "\n", e$message) }) df <- df[!df[[1]] %in% c("", "Scan"), ] numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd") df[numeric_columns[numeric_columns %in% colnames(df)]] <- lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric) df$L <- if ("l" %in% colnames(df)) df$l else {warning("Missing column: l"); NA} df$AUC <- if ("AUC96" %in% colnames(df)) df$AUC96 else {warning("Missing column: AUC96"); NA} df$conc_num <- if ("Conc" %in% colnames(df)) as.numeric(gsub("[^0-9\\.]", "", df$Conc)) else NA df$delta_bg <- if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(df))) df$LstBackgrd - df$X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA} if (nrow(df) == 0) warning("Dataframe is empty after filtering") df } df <- load_and_preprocess_data(args$input_file) delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + args$delta_bg_factor * sd(df$delta_bg, na.rm = TRUE) message("Exp", args$exp_number, ": The delta_bg_factor is: ", args$delta_bg_factor) message("Exp", args$exp_number, ": The delta_bg_tolerance is ", delta_bg_tolerance) df <- df %>% mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>% filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21") # Function to create plot create_plot <- function(df, var, plot_type) { filtered_df <- df %>% filter(is.finite(.data[[var]])) p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) + ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) + theme_publication() if (plot_type == "scatter") { p <- p + geom_point(shape = 3, size = 0.2, position = "jitter") + stat_summary(fun = mean, geom = "point", size = 0.6) + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") } else { p <- p + geom_boxplot() } return(p) } # Function to publish plot to PDF and HTML (Plotly) publish_plot <- function(plot, plot_path) { # if (file.exists(plot_path)) { # file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX)) # } pdf(plot_path, width = PLOT_WIDTH, height = PLOT_HEIGHT) print(plot) dev.off() pgg <- suppressWarnings(ggplotly(plot, tooltip = c("L", "K", "ORF", "Gene", "delta_bg")) %>% layout(legend = list(orientation = "h"))) saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE) } # Function to publish multiple plots publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") { lapply(variables, function(var) { plot <- create_plot(df, var, plot_type) publish_plot(plot, file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))) }) } # Function to calculate and publish summary statistics publish_summary_stats <- function(df, variables, out_dir) { stats_list <- lapply(variables, function(var) { df %>% group_by(OrfRep, conc_num) %>% summarize( N = n(), mean_val = mean(.data[[var]], na.rm = TRUE), sd_val = sd(.data[[var]], na.rm = TRUE), se_val = sd_val / sqrt(N) ) }) summary_stats_df <- bind_rows(stats_list, .id = "variable") write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) } # Function to calculate and publish interaction scores publish_interaction_scores <- function(df, out_dir) { interaction_scores <- df %>% group_by(OrfRep) %>% summarize( mean_L = mean(L, na.rm = TRUE), mean_K = mean(K, na.rm = TRUE), mean_r = mean(r, na.rm = TRUE), mean_AUC = mean(AUC, na.rm = TRUE), delta_bg_mean = mean(delta_bg, na.rm = TRUE), delta_bg_sd = sd(delta_bg, na.rm = TRUE) ) %>% mutate( l_rank = rank(mean_L), k_rank = rank(mean_K), r_rank = rank(mean_r), auc_rank = rank(mean_AUC) ) write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE) write.csv(interaction_scores %>% arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE) } # Function to publish z-scores publish_zscores <- function(df, out_dir) { zscores <- df %>% mutate( zscore_L = scale(L, center = TRUE, scale = TRUE), zscore_K = scale(K, center = TRUE, scale = TRUE), zscore_r = scale(r, center = TRUE, scale = TRUE), zscore_AUC = scale(AUC, center = TRUE, scale = TRUE) ) write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) } # QC plot generation and publication generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) { publish_multiple_plots(df, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc) delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ] publish_multiple_plots(delta_bg_above_tolerance, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc, "_above_tolerance") } # Run QC plot generation and publication generate_and_publish_qc(df, delta_bg_tolerance, args$out_dir_qc) # Run summary statistics, interaction score, and z-score calculations and publications variables <- c("L", "K", "r", "AUC", "delta_bg") publish_summary_stats(df, variables, args$out_dir) publish_interaction_scores(df, args$out_dir) publish_zscores(df, args$out_dir)