diff --git a/workflow/.old/apps/r/calculate_interaction_zscores.R b/workflow/.old/apps/r/calculate_interaction_zscores.R new file mode 100644 index 00000000..6e0224f6 --- /dev/null +++ b/workflow/.old/apps/r/calculate_interaction_zscores.R @@ -0,0 +1,353 @@ +suppressMessages({ + if (!require("ggplot2")) stop("Package ggplot2 is required but not installed.") + if (!require("plotly")) stop("Package plotly is required but not installed.") + if (!require("htmlwidgets")) stop("Package htmlwidgets is required but not installed.") + if (!require("dplyr")) stop("Package dplyr is required but not installed.") + if (!require("ggthemes")) stop("Package ggthemes is required but not installed.") + if (!require("plyr")) stop("Package plyr is required but not installed.") +}) + +# Constants for configuration +PLOT_WIDTH <- 14 +PLOT_HEIGHT <- 9 +BASE_SIZE <- 14 + +options(warn = 2, max.print = 100) + +parse_arguments <- function() { + args <- if (interactive()) { + c( + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD", + 3, + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1", + "Experiment 1: Doxo versus HLD", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2", + "Experiment 2: HLD versus Doxo" + ) + } else { + commandArgs(trailingOnly = TRUE) + } + + paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE) + names <- args[seq(6, length(args), by = 2)] + experiments <- setNames(paths, names) + + list( + out_dir = normalizePath(file.path(args[1]), mustWork = FALSE), + sd = as.numeric(args[2]), + sgd_gene_list = normalizePath(file.path(args[3]), mustWork = FALSE), + easy_results_file = normalizePath(file.path(args[4]), mustWork = FALSE), + experiments = experiments + ) +} + +args <- parse_arguments() + +dir.create(args$out_dir, showWarnings = FALSE) + +# Define themes and scales +theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") { + theme_foundation(base_size = base_size, base_family = base_family) + + theme( + plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold", size = rel(1)), + axis.title.y = element_text(angle = 90, vjust = 2), + axis.title.x = element_text(vjust = -0.2), + axis.line = element_line(colour = "black"), + panel.grid.major = element_line(colour = "#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + plot.margin = unit(c(10, 5, 5, 5), "mm"), + strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), + strip.text = element_text(face = "bold") + ) +} + +theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") { + theme_publication(base_size, base_family) + + theme( + legend.position = "right", + legend.direction = "vertical", + legend.key.size = unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face = "italic") + ) +} + +scale_fill_publication <- function(...) { + discrete_scale("fill", "Publication", manual_pal(values = c( + "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" + )), ...) +} + +scale_colour_publication <- function(...) { + discrete_scale("colour", "Publication", manual_pal(values = c( + "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" + )), ...) +} + +# Load SGD gene list +sgd_genes <- function(sgd_gene_list) { + read.delim(file = sgd_gene_list, quote = "", header = FALSE, + colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>% + dplyr::rename(ORF = V4, GeneName = V5) +} + +genes <- sgd_genes(args$sgd_gene_list) + +load_and_preprocess_data <- function(easy_results_file, genes) { + df <- tryCatch({ + read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) + }, error = function(e) { + stop("Error reading file: ", easy_results_file, "\n", e$message) + }) %>% + filter(!.[[1]] %in% c("", "Scan")) + + numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd") + df[numeric_columns[numeric_columns %in% colnames(df)]] <- + lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric) + + df <- df %>% + mutate( + L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA}, + AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA}, + conc_num = if ("Conc" %in% colnames(.)) as.numeric(gsub("[^0-9\\.]", "", Conc)) else NA, + delta_bg = if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(.))) + LstBackgrd - X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA}, + GeneName = vapply(ORF, function(orf) { + gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName) + ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name) + }, character(1)) + ) + + if (nrow(df) == 0) warning("Dataframe is empty after filtering") + + return(df) +} + +create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") { + plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot + filtered_df <- filter(df, is.finite(.data[[var]])) + + p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) + + plot_func(shape = 3, size = 0.2, position = "jitter") + + stat_summary(fun = mean, geom = "point", size = 0.6) + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") + + ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) + + theme_publication() + + pdf_path <- file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf")) + pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT) + print(p) + dev.off() + + html_path <- sub(".pdf$", ".html", pdf_path) + pgg <- suppressWarnings(ggplotly(p, tooltip = c("L", "K", "ORF", "Gene", "delta_bg", "GeneName")) %>% + layout(legend = list(orientation = "h"))) + saveWidget(pgg, html_path, selfcontained = TRUE) +} + +publish_summary_stats <- function(df, variables, out_dir) { + stats_list <- lapply(variables, function(var) { + df %>% + dplyr::group_by(OrfRep, conc_num) %>% + dplyr::summarize( + N = dplyr::n(), + mean_val = mean(.data[[var]], na.rm = TRUE), + sd_val = sd(.data[[var]], na.rm = TRUE), + se_val = sd_val / sqrt(N) + ) + }) + summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable") + write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) +} + +publish_interaction_scores <- function(df, out_dir) { + interaction_scores <- df %>% + group_by(OrfRep) %>% + summarize( + mean_L = mean(L, na.rm = TRUE), + mean_K = mean(K, na.rm = TRUE), + mean_r = mean(r, na.rm = TRUE), + mean_AUC = mean(AUC, na.rm = TRUE), + delta_bg_mean = mean(delta_bg, na.rm = TRUE), + delta_bg_sd = sd(delta_bg, na.rm = TRUE) + ) %>% + mutate( + l_rank = rank(mean_L), + k_rank = rank(mean_K), + r_rank = rank(mean_r), + auc_rank = rank(mean_AUC) + ) + + write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE) + write.csv(arrange(interaction_scores, l_rank, k_rank), + file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE) +} + +publish_zscores <- function(df, out_dir) { + zscores <- df %>% + mutate( + zscore_L = scale(L, center = TRUE, scale = TRUE), + zscore_K = scale(K, center = TRUE, scale = TRUE), + zscore_r = scale(r, center = TRUE, scale = TRUE), + zscore_AUC = scale(AUC, center = TRUE, scale = TRUE) + ) + + write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) +} + +generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) { + variables <- c("L", "K", "r", "AUC", "delta_bg") + lapply(variables, create_and_publish_plot, df = df, plot_type = "scatter", out_dir_qc = out_dir_qc) + delta_bg_above_tolerance <- filter(df, delta_bg >= delta_bg_tolerance) + lapply(variables, create_and_publish_plot, df = delta_bg_above_tolerance, + plot_type = "scatter", out_dir_qc = out_dir_qc, suffix = "_above_tolerance") +} + +process_exp_dir <- function(exp_dir, exp_name, genes, easy_results_file) { + out_dir <- file.path(exp_dir, "zscores") + out_dir_qc <- file.path(exp_dir, "qc") + dir.create(out_dir, showWarnings = FALSE) + dir.create(out_dir_qc, showWarnings = FALSE) + df <- load_and_preprocess_data(easy_results_file, genes) + delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + 3 * sd(df$delta_bg, na.rm = TRUE) + + generate_and_publish_qc(df, delta_bg_tolerance, out_dir_qc) + + variables <- c("L", "K", "r", "AUC", "delta_bg") + publish_summary_stats(df, variables, out_dir) + publish_interaction_scores(df, out_dir) + publish_zscores(df, out_dir) + + list( + zscores_file = file.path(out_dir, "zscores_interaction.csv"), + exp_name = exp_name + ) +} + +processed_experiments <- lapply(names(args$experiments), function(exp_name) { + process_exp_dir(args$experiments[[exp_name]], exp_name, genes, args$easy_results_file) +}) + +zscores_files <- sapply(processed_experiments, `[[`, "zscores_file") +exp_names <- sapply(processed_experiments, `[[`, "exp_name") + +# Sequentially join z-scores similar to original logic +combine_zscores <- function(zscores_files) { + if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script") + + if (length(zscores_files) == 2) { + X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE) + X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE) + X <- plyr::join(X1, X2, by = "OrfRep", type = "full") + + } else if (length(zscores_files) == 3) { + X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE) + X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE) + X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE) + X <- plyr::join(X1, X2, by = "OrfRep", type = "full") + X <- plyr::join(X, X3, by = "OrfRep", type = "full") + + } else if (length(zscores_files) == 4) { + X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE) + X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE) + X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE) + X4 <- read.csv(file = zscores_files[4], stringsAsFactors = FALSE) + X <- plyr::join(X1, X2, by = "OrfRep", type = "full") + X <- plyr::join(X, X3, by = "OrfRep", type = "full") + X <- plyr::join(X, X4, by = "OrfRep", type = "full") + } + + X +} + +process_combined_zscores <- function(joined_data, sd, out_dir, exp_names) { + OBH <- joined_data[, order(colnames(joined_data))] # OrderByHeader + + headSel <- OBH %>% + select(contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'), contains('Z_lm_L'), contains('Z_Shift_L')) %>% + select(-contains("Gene.")) + + headSel2 <- OBH %>% + select(contains('OrfRep'), matches('Gene')) %>% + select(-contains("Gene.")) + + # Fill NA values + fill_na_in_columns <- function(data) { + for (header in colnames(data)) { + if (nrow(data) == 0) { + warning(paste("Empty data frame for column:", header)) + } + if (grepl("Shift", header)) { + data[[header]][is.na(data[[header]])] <- 0.001 + } else if (grepl("Z_lm_", header)) { + data[[header]][is.na(data[[header]])] <- 0.0001 + } + } + data + } + + headSel <- fill_na_in_columns(headSel) + + # Filter by standard deviation + filter_by_sd <- function(data, sd) { + if (sd == 0) return(data) + + z_lm_cols <- select(data, contains("z_lm_")) + filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0 + data[filter_vector, ] + } + + REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), sd) + shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), sd) + + # Reorder columns + reorder_columns <- function(data1, data2) { + combined_data <- data1 + for (i in 3:ncol(data1)) { + if (nrow(data1) == 0 | nrow(data2) == 0) { + next + } + combined_data <- cbind(combined_data, data2[i], data1[i]) + } + combined_data + } + + combI <- reorder_columns(headSel2, shiftOnly) + + write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE) + write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE) + + # Relabel headers + relabel_headers <- function(headers, exp_names) { + new_labels <- headers + for (i in seq_along(headers)) { + suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i]) + if (suffix %in% seq_along(exp_names)) { + exp_name <- exp_names[as.numeric(suffix)] + new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i]) + } + } + new_labels + } + + colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), exp_names) + colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), exp_names) + + write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE) + write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE) +} + +joined_data <- combine_zscores(zscores_files) +process_combined_zscores(joined_data, args$sd, args$out_dir, exp_names) diff --git a/workflow/.old/apps/r/interactions.R b/workflow/.old/apps/r/interactions.R new file mode 100644 index 00000000..0434ff37 --- /dev/null +++ b/workflow/.old/apps/r/interactions.R @@ -0,0 +1,250 @@ +suppressMessages({ + library("ggplot2") + library("plotly") + library("htmlwidgets") + library("dplyr") + library("ggthemes") +}) + +# Constants for configuration +PLOT_WIDTH <- 14 +PLOT_HEIGHT <- 9 +BASE_SIZE <- 14 +# BACKUP_SUFFIX <- paste0(".backup_", Sys.Date()) + +options(warn = 2, max.print = 100) + +# Parse arguments +parse_arguments <- function() { + if (interactive()) { + args <- c( + "1", + "3", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2/results_std.txt", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1/zscores" + ) + } else { + args <- commandArgs(trailingOnly = TRUE) + } + list( + exp_number = as.numeric(args[1]), + delta_bg_factor = as.numeric(args[2]), + study_info_file = normalizePath(file.path(args[3]), mustWork = FALSE), + sgd_gene_list = normalizePath(file.path(args[4]), mustWork = FALSE), + input_file = normalizePath(file.path(args[5]), mustWork = FALSE), + out_dir = normalizePath(file.path(args[6]), mustWork = FALSE), + out_dir_qc = normalizePath(file.path(args[6], "qc"), mustWork = FALSE) + ) +} + +args <- parse_arguments() +dir.create(args$out_dir, showWarnings = FALSE) +dir.create(args$out_dir_qc, showWarnings = FALSE) + +# Define themes and scales +theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") { + theme_foundation(base_size = base_size, base_family = base_family) + + theme( + plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), + text = element_text(), + panel.background = element_rect(colour = NA), + plot.background = element_rect(colour = NA), + panel.border = element_rect(colour = NA), + axis.title = element_text(face = "bold", size = rel(1)), + axis.title.y = element_text(angle = 90, vjust = 2), + axis.title.x = element_text(vjust = -0.2), + axis.line = element_line(colour = "black"), + panel.grid.major = element_line(colour = "#f0f0f0"), + panel.grid.minor = element_blank(), + legend.key = element_rect(colour = NA), + legend.position = "bottom", + legend.direction = "horizontal", + plot.margin = unit(c(10, 5, 5, 5), "mm"), + strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), + strip.text = element_text(face = "bold") + ) +} + +theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") { + theme_publication(base_size, base_family) + + theme( + legend.position = "right", + legend.direction = "vertical", + legend.key.size = unit(0.5, "cm"), + legend.spacing = unit(0, "cm"), + legend.title = element_text(face = "italic") + ) +} + +scale_fill_publication <- function(...) { + discrete_scale("fill", "Publication", manual_pal(values = c( + "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" + )), ...) +} + +scale_colour_publication <- function(...) { + discrete_scale("colour", "Publication", manual_pal(values = c( + "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", + "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" + )), ...) +} + + +# Load and preprocess data +load_and_preprocess_data <- function(input_file, genes) { + df <- tryCatch({ + read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) + }, error = function(e) { + stop("Error reading file: ", input_file, "\n", e$message) + }) + + # Filter out rows where the first column is empty or "Scan" + df <- df[!df[[1]] %in% c("", "Scan"), ] + + # Convert specific columns to numeric + numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd") + df[numeric_columns[numeric_columns %in% colnames(df)]] <- lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric) + + # Handle missing critical columns with warnings + df$L <- if ("l" %in% colnames(df)) df$l else {warning("Missing column: l"); NA} + df$AUC <- if ("AUC96" %in% colnames(df)) df$AUC96 else {warning("Missing column: AUC96"); NA} + df$conc_num <- if ("Conc" %in% colnames(df)) as.numeric(gsub("[^0-9\\.]", "", df$Conc)) else NA + df$delta_bg <- if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(df))) df$LstBackgrd - df$X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA} + + # Replace ORF IDs with Gene Names using the SGD gene list + df$GeneName <- vapply(df$ORF, function(orf) { + gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName) + ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name) + }, character(1)) + + if (nrow(df) == 0) warning("Dataframe is empty after filtering") + + return(df) +} + + +df <- load_and_preprocess_data(args$input_file) + +delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + args$delta_bg_factor * sd(df$delta_bg, na.rm = TRUE) +message("Exp", args$exp_number, ": The delta_bg_factor is: ", args$delta_bg_factor) +message("Exp", args$exp_number, ": The delta_bg_tolerance is ", delta_bg_tolerance) + +df <- df %>% + mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>% + filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21") + +# Create plot +create_plot <- function(df, var, plot_type) { + filtered_df <- df %>% filter(is.finite(.data[[var]])) + p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) + + ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) + + theme_publication() + + if (plot_type == "scatter") { + p <- p + + geom_point(shape = 3, size = 0.2, position = "jitter") + + stat_summary(fun = mean, geom = "point", size = 0.6) + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") + } else { + p <- p + geom_boxplot() + } + + return(p) +} + +# Publish plot to PDF and HTML (Plotly) +publish_plot <- function(plot, plot_path) { + # if (file.exists(plot_path)) { + # file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX)) + # } + + pdf(plot_path, width = PLOT_WIDTH, height = PLOT_HEIGHT) + print(plot) + dev.off() + + pgg <- suppressWarnings(ggplotly(plot, + tooltip = c("L", "K", "ORF", "Gene", "delta_bg")) %>% + layout(legend = list(orientation = "h"))) + saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE) +} + +# Publish multiple plots +publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") { + lapply(variables, function(var) { + plot <- create_plot(df, var, plot_type) + publish_plot(plot, file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))) + }) +} + +# Calculate and publish summary statistics +publish_summary_stats <- function(df, variables, out_dir) { + stats_list <- lapply(variables, function(var) { + df %>% + group_by(OrfRep, conc_num) %>% + summarize( + N = n(), + mean_val = mean(.data[[var]], na.rm = TRUE), + sd_val = sd(.data[[var]], na.rm = TRUE), + se_val = sd_val / sqrt(N) + ) + }) + summary_stats_df <- bind_rows(stats_list, .id = "variable") + write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) +} + +# Calculate and publish interaction scores +publish_interaction_scores <- function(df, out_dir) { + interaction_scores <- df %>% + group_by(OrfRep) %>% + summarize( + mean_L = mean(L, na.rm = TRUE), + mean_K = mean(K, na.rm = TRUE), + mean_r = mean(r, na.rm = TRUE), + mean_AUC = mean(AUC, na.rm = TRUE), + delta_bg_mean = mean(delta_bg, na.rm = TRUE), + delta_bg_sd = sd(delta_bg, na.rm = TRUE) + ) %>% + mutate( + l_rank = rank(mean_L), + k_rank = rank(mean_K), + r_rank = rank(mean_r), + auc_rank = rank(mean_AUC) + ) + + write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE) + write.csv(interaction_scores %>% + arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE) +} + +# Publish z-scores +publish_zscores <- function(df, out_dir) { + zscores <- df %>% + mutate( + zscore_L = scale(L, center = TRUE, scale = TRUE), + zscore_K = scale(K, center = TRUE, scale = TRUE), + zscore_r = scale(r, center = TRUE, scale = TRUE), + zscore_AUC = scale(AUC, center = TRUE, scale = TRUE) + ) + + write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) +} + +# QC plot generation and publication +generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) { + publish_multiple_plots(df, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc) + + delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ] + publish_multiple_plots(delta_bg_above_tolerance, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc, "_above_tolerance") +} + +# Run QC plot generation and publication +generate_and_publish_qc(df, delta_bg_tolerance, args$out_dir_qc) + +# Run summary statistics, interaction score, and z-score calculations and publications +variables <- c("L", "K", "r", "AUC", "delta_bg") +publish_summary_stats(df, variables, args$out_dir) +publish_interaction_scores(df, args$out_dir) +publish_zscores(df, args$out_dir)