From 1502f6bf382450006bfb13124f91a0bb7ddc4578 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Fri, 23 Aug 2024 04:01:13 -0400 Subject: [PATCH] Implement calculate_interaction_zscores.R --- .../apps/r/calculate_interaction_zscores.R | 214 ++++++---------- workflow/apps/r/interactions.R | 238 ------------------ workflow/apps/r/joinInteractExps.R | 2 +- workflow/qhtcp-workflow | 27 +- 4 files changed, 101 insertions(+), 380 deletions(-) delete mode 100644 workflow/apps/r/interactions.R diff --git a/workflow/apps/r/calculate_interaction_zscores.R b/workflow/apps/r/calculate_interaction_zscores.R index 39d12f61..dbc5e9d2 100644 --- a/workflow/apps/r/calculate_interaction_zscores.R +++ b/workflow/apps/r/calculate_interaction_zscores.R @@ -1,10 +1,10 @@ suppressMessages({ - if (!require("ggplot2")) stop("Package ggplot2 is required but not installed.") - if (!require("plotly")) stop("Package plotly is required but not installed.") - if (!require("htmlwidgets")) stop("Package htmlwidgets is required but not installed.") - if (!require("dplyr")) stop("Package dplyr is required but not installed.") - if (!require("ggthemes")) stop("Package ggthemes is required but not installed.") - if (!require("plyr")) stop("Package plyr is required but not installed.") + library("ggplot2") + library("plotly") + library("htmlwidgets") + library("dplyr") + library("ggthemes") + library("data.table") }) # Constants for configuration @@ -14,9 +14,10 @@ BASE_SIZE <- 14 options(warn = 2, max.print = 100) +# Function to parse arguments parse_arguments <- function() { - args <- if (interactive()) { - c( + if (interactive()) { + args <- c( "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD", 3, "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab", @@ -24,14 +25,10 @@ parse_arguments <- function() { "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1", "Experiment 1: Doxo versus HLD", "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2", - "Experiment 2: HLD versus Doxo", - "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp3", - "Experiment 3: HLD versus WT", - "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp4", - "Experiment 4: Doxo versus WT" + "Experiment 2: HLD versus Doxo" ) } else { - commandArgs(trailingOnly = TRUE) + args <- commandArgs(trailingOnly = TRUE) } paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE) @@ -49,7 +46,9 @@ parse_arguments <- function() { args <- parse_arguments() -dir.create(args$out_dir, showWarnings = FALSE) +# Ensure main output directories exist +dir.create(file.path(args$out_dir, "zscores"), showWarnings = FALSE) +dir.create(file.path(args$out_dir, "zscores", "qc"), showWarnings = FALSE) # Define themes and scales theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") { @@ -110,18 +109,26 @@ sgd_genes <- function(sgd_gene_list) { genes <- sgd_genes(args$sgd_gene_list) load_and_preprocess_data <- function(easy_results_file, genes) { + # Attempt to read the file and handle any errors that occur df <- tryCatch({ read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) }, error = function(e) { stop("Error reading file: ", easy_results_file, "\n", e$message) - }) %>% - filter(!.[[1]] %in% c("", "Scan")) # Fixed syntax - - numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd") - df[numeric_columns[numeric_columns %in% colnames(df)]] <- - lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric) - + }) + + # Filter out non-data rows df <- df %>% + filter(!.[[1]] %in% c("", "Scan")) + + # Convert specified columns to numeric + numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd") + df[numeric_columns[numeric_columns %in% colnames(df)]] <- + lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric) + + # Further filter and mutate the data + df <- df %>% + filter(!(Scan %in% c("", "Scan"))) %>% + {if ("Conc" %in% colnames(.)) filter(., Conc != "0ug/mL") else .} %>% mutate( L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA}, AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA}, @@ -131,17 +138,20 @@ load_and_preprocess_data <- function(easy_results_file, genes) { GeneName = vapply(ORF, function(orf) { gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName) ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name) - }, character(1)) # Ensures a character vector is returned + }, character(1)) ) - + + # Check if the dataframe is empty after filtering if (nrow(df) == 0) warning("Dataframe is empty after filtering") return(df) } + +# Plot creation and publication create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") { plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot - filtered_df <- filter(df, is.finite(.data[[var]])) + filtered_df <- df[is.finite(df[[var]]), ] p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) + plot_func(shape = 3, size = 0.2, position = "jitter") + @@ -161,27 +171,27 @@ create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") saveWidget(pgg, html_path, selfcontained = TRUE) } - +# Summary statistics publication publish_summary_stats <- function(df, variables, out_dir) { stats_list <- lapply(variables, function(var) { df %>% dplyr::group_by(OrfRep, conc_num) %>% dplyr::summarize( - N = dplyr::n(), # Ensure that the correct version of n() is used + N = dplyr::n(), mean_val = mean(.data[[var]], na.rm = TRUE), sd_val = sd(.data[[var]], na.rm = TRUE), se_val = sd_val / sqrt(N) ) }) summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable") - write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) + fwrite(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) } - +# Interaction scores publication publish_interaction_scores <- function(df, out_dir) { interaction_scores <- df %>% - group_by(OrfRep) %>% - summarize( + dplyr::group_by(OrfRep) %>% + dplyr::summarize( mean_L = mean(L, na.rm = TRUE), mean_K = mean(K, na.rm = TRUE), mean_r = mean(r, na.rm = TRUE), @@ -189,147 +199,75 @@ publish_interaction_scores <- function(df, out_dir) { delta_bg_mean = mean(delta_bg, na.rm = TRUE), delta_bg_sd = sd(delta_bg, na.rm = TRUE) ) %>% - mutate( + dplyr::mutate( l_rank = rank(mean_L), k_rank = rank(mean_K), r_rank = rank(mean_r), auc_rank = rank(mean_AUC) ) - write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE) - write.csv(arrange(interaction_scores, l_rank, k_rank), - file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE) + fwrite(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE) + fwrite(dplyr::arrange(interaction_scores, l_rank, k_rank), + file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE) } +# Z-scores publication publish_zscores <- function(df, out_dir) { zscores <- df %>% - mutate( + dplyr::mutate( zscore_L = scale(L, center = TRUE, scale = TRUE), zscore_K = scale(K, center = TRUE, scale = TRUE), zscore_r = scale(r, center = TRUE, scale = TRUE), zscore_AUC = scale(AUC, center = TRUE, scale = TRUE) ) - write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) + fwrite(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) } +# QC generation and publication generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) { variables <- c("L", "K", "r", "AUC", "delta_bg") lapply(variables, create_and_publish_plot, df = df, plot_type = "scatter", out_dir_qc = out_dir_qc) - delta_bg_above_tolerance <- filter(df, delta_bg >= delta_bg_tolerance) + delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ] lapply(variables, create_and_publish_plot, df = delta_bg_above_tolerance, plot_type = "scatter", out_dir_qc = out_dir_qc, suffix = "_above_tolerance") } -process_exp_dir <- function(exp_dir, exp_name, genes, easy_results_file) { +# Process experiments +process_experiment <- function(exp_name, exp_dir, sgd_genes, output_dir) { out_dir <- file.path(exp_dir, "zscores") - out_dir_qc <- file.path(exp_dir, "qc") - dir.create(out_dir, showWarnings = FALSE) + out_dir_qc <- file.path(out_dir, "qc") + dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) dir.create(out_dir_qc, showWarnings = FALSE) - df <- load_and_preprocess_data(easy_results_file, genes) - delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + 3 * sd(df$delta_bg, na.rm = TRUE) - generate_and_publish_qc(df, delta_bg_tolerance, out_dir_qc) + data <- load_and_preprocess_data(args$easy_results_file, sgd_genes) + + delta_bg_tolerance <- mean(data$delta_bg, na.rm = TRUE) + 3 * sd(data$delta_bg, na.rm = TRUE) + + generate_and_publish_qc(data, delta_bg_tolerance, out_dir_qc) variables <- c("L", "K", "r", "AUC", "delta_bg") - publish_summary_stats(df, variables, out_dir) - publish_interaction_scores(df, out_dir) - publish_zscores(df, out_dir) + publish_summary_stats(data, variables, out_dir) + publish_interaction_scores(data, out_dir) + publish_zscores(data, out_dir) - list( - zscores_file = file.path(out_dir, "zscores_interaction.csv"), - exp_name = exp_name - ) + output_file <- file.path(out_dir, "zscores_interaction.csv") + fwrite(data, output_file, row.names = FALSE) + + return(output_file) } -processed_experiments <- lapply(names(args$experiments), function(exp_name) { - process_exp_dir(args$experiments[[exp_name]], exp_name, genes, args$easy_results_file) +# Process all experiments +processed_files <- lapply(names(args$experiments), function(exp_name) { + process_experiment(exp_name, args$experiments[[exp_name]], genes, args$out_dir) }) -zscores_files <- sapply(processed_experiments, `[[`, "zscores_file") -exp_names <- sapply(processed_experiments, `[[`, "exp_name") - -combine_zscores <- function(zscores_files) { - if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script") +# Combine results from all experiments if multiple experiments exist +if (length(processed_files) > 1) { + combined_data <- Reduce(function(x, y) { + merge(fread(x), fread(y), by = "OrfRep", all = TRUE, allow.cartesian = TRUE) + }, processed_files) - joined_data <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE) - for (file in zscores_files[-1]) { - temp_data <- read.csv(file = file, stringsAsFactors = FALSE) - joined_data <- plyr::join(joined_data, temp_data, by = "OrfRep", type = "full") - } - joined_data + combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv") + fwrite(combined_data, combined_output_file, row.names = FALSE) } - -process_combined_zscores <- function(joined_data, sd, out_dir, exp_names) { - ordered_data <- joined_data %>% - select(contains("OrfRep"), matches("Gene"), - contains("z_lm_k"), contains("z_shift_k"), - contains("z_lm_l"), contains("z_shift_l")) %>% - arrange(contains("OrfRep")) - - clean_headers <- function(data, suffixes) { - suffixes_to_remove <- paste0("Gene.", seq_len(suffixes)) - select(data, -all_of(suffixes_to_remove)) - } - - headSel <- clean_headers(ordered_data, length(zscores_files) - 1) - headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1) - - fill_na_in_columns <- function(data) { - for (header in colnames(data)) { - if (grepl("Shift", header)) { - data[[header]][is.na(data[[header]])] <- 0.001 - } else if (grepl("Z_lm_", header)) { - data[[header]][is.na(data[[header]])] <- 0.0001 - } - } - data - } - - headSel <- fill_na_in_columns(headSel) - - filter_by_sd <- function(data, sd) { - if (sd == 0) return(data) - - z_lm_cols <- select(data, contains("z_lm_")) - filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0 - data[filter_vector, ] - } - - REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), sd) - shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), sd) - - reorder_columns <- function(data1, data2) { - combined_data <- data1 - for (i in 3:ncol(data1)) { - combined_data <- cbind(combined_data, data2[i], data1[i]) - } - combined_data - } - - combI <- reorder_columns(headSel2, shiftOnly) - - write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE) - write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE) - - relabel_headers <- function(headers, exp_names) { - new_labels <- headers - for (i in seq_along(headers)) { - suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i]) - if (suffix %in% seq_along(exp_names)) { - exp_name <- exp_names[as.numeric(suffix)] - new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i]) - } - } - new_labels - } - - colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), exp_names) - colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), exp_names) - - write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE) - write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE) -} - -joined_data <- combine_zscores(zscores_files) -process_combined_zscores(joined_data, args$sd, args$out_dir, exp_names) diff --git a/workflow/apps/r/interactions.R b/workflow/apps/r/interactions.R deleted file mode 100644 index 1c04f797..00000000 --- a/workflow/apps/r/interactions.R +++ /dev/null @@ -1,238 +0,0 @@ -suppressMessages({ - library("ggplot2") - library("plotly") - library("htmlwidgets") - library("dplyr") - library("ggthemes") -}) - -# Constants for configuration -PLOT_WIDTH <- 14 -PLOT_HEIGHT <- 9 -BASE_SIZE <- 14 -# BACKUP_SUFFIX <- paste0(".backup_", Sys.Date()) - -options(warn = 2, max.print = 100) - -# Parse arguments -parse_arguments <- function() { - if (interactive()) { - args <- c( - "1", - "3", - "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv", - "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab", - "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2/results_std.txt", - "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1/zscores" - ) - } else { - args <- commandArgs(trailingOnly = TRUE) - } - list( - exp_number = as.numeric(args[1]), - delta_bg_factor = as.numeric(args[2]), - study_info_file = normalizePath(file.path(args[3]), mustWork = FALSE), - sgd_gene_list = normalizePath(file.path(args[4]), mustWork = FALSE), - input_file = normalizePath(file.path(args[5]), mustWork = FALSE), - out_dir = normalizePath(file.path(args[6]), mustWork = FALSE), - out_dir_qc = normalizePath(file.path(args[6], "qc"), mustWork = FALSE) - ) -} - -args <- parse_arguments() -dir.create(args$out_dir, showWarnings = FALSE) -dir.create(args$out_dir_qc, showWarnings = FALSE) - -# Define themes and scales -theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") { - theme_foundation(base_size = base_size, base_family = base_family) + - theme( - plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5), - text = element_text(), - panel.background = element_rect(colour = NA), - plot.background = element_rect(colour = NA), - panel.border = element_rect(colour = NA), - axis.title = element_text(face = "bold", size = rel(1)), - axis.title.y = element_text(angle = 90, vjust = 2), - axis.title.x = element_text(vjust = -0.2), - axis.line = element_line(colour = "black"), - panel.grid.major = element_line(colour = "#f0f0f0"), - panel.grid.minor = element_blank(), - legend.key = element_rect(colour = NA), - legend.position = "bottom", - legend.direction = "horizontal", - plot.margin = unit(c(10, 5, 5, 5), "mm"), - strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"), - strip.text = element_text(face = "bold") - ) -} - -theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") { - theme_publication(base_size, base_family) + - theme( - legend.position = "right", - legend.direction = "vertical", - legend.key.size = unit(0.5, "cm"), - legend.spacing = unit(0, "cm"), - legend.title = element_text(face = "italic") - ) -} - -scale_fill_publication <- function(...) { - discrete_scale("fill", "Publication", manual_pal(values = c( - "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", - "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" - )), ...) -} - -scale_colour_publication <- function(...) { - discrete_scale("colour", "Publication", manual_pal(values = c( - "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", - "#a6cee3", "#fb9a99", "#984ea3", "#ffff33" - )), ...) -} - -# Load and preprocess data -load_and_preprocess_data <- function(input_file) { - df <- tryCatch({ - read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE) - }, error = function(e) { - stop("Error reading file: ", input_file, "\n", e$message) - }) - - df <- df[!df[[1]] %in% c("", "Scan"), ] - - numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd") - df[numeric_columns[numeric_columns %in% colnames(df)]] <- lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric) - - df$L <- if ("l" %in% colnames(df)) df$l else {warning("Missing column: l"); NA} - df$AUC <- if ("AUC96" %in% colnames(df)) df$AUC96 else {warning("Missing column: AUC96"); NA} - df$conc_num <- if ("Conc" %in% colnames(df)) as.numeric(gsub("[^0-9\\.]", "", df$Conc)) else NA - df$delta_bg <- if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(df))) df$LstBackgrd - df$X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA} - - if (nrow(df) == 0) warning("Dataframe is empty after filtering") - df -} - -df <- load_and_preprocess_data(args$input_file) - -delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + args$delta_bg_factor * sd(df$delta_bg, na.rm = TRUE) -message("Exp", args$exp_number, ": The delta_bg_factor is: ", args$delta_bg_factor) -message("Exp", args$exp_number, ": The delta_bg_tolerance is ", delta_bg_tolerance) - -df <- df %>% - mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>% - filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21") - -# Create plot -create_plot <- function(df, var, plot_type) { - filtered_df <- df %>% filter(is.finite(.data[[var]])) - p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) + - ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) + - theme_publication() - - if (plot_type == "scatter") { - p <- p + - geom_point(shape = 3, size = 0.2, position = "jitter") + - stat_summary(fun = mean, geom = "point", size = 0.6) + - stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") - } else { - p <- p + geom_boxplot() - } - - return(p) -} - -# Publish plot to PDF and HTML (Plotly) -publish_plot <- function(plot, plot_path) { - # if (file.exists(plot_path)) { - # file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX)) - # } - - pdf(plot_path, width = PLOT_WIDTH, height = PLOT_HEIGHT) - print(plot) - dev.off() - - pgg <- suppressWarnings(ggplotly(plot, - tooltip = c("L", "K", "ORF", "Gene", "delta_bg")) %>% - layout(legend = list(orientation = "h"))) - saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE) -} - -# Publish multiple plots -publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") { - lapply(variables, function(var) { - plot <- create_plot(df, var, plot_type) - publish_plot(plot, file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))) - }) -} - -# Calculate and publish summary statistics -publish_summary_stats <- function(df, variables, out_dir) { - stats_list <- lapply(variables, function(var) { - df %>% - group_by(OrfRep, conc_num) %>% - summarize( - N = n(), - mean_val = mean(.data[[var]], na.rm = TRUE), - sd_val = sd(.data[[var]], na.rm = TRUE), - se_val = sd_val / sqrt(N) - ) - }) - summary_stats_df <- bind_rows(stats_list, .id = "variable") - write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) -} - -# Calculate and publish interaction scores -publish_interaction_scores <- function(df, out_dir) { - interaction_scores <- df %>% - group_by(OrfRep) %>% - summarize( - mean_L = mean(L, na.rm = TRUE), - mean_K = mean(K, na.rm = TRUE), - mean_r = mean(r, na.rm = TRUE), - mean_AUC = mean(AUC, na.rm = TRUE), - delta_bg_mean = mean(delta_bg, na.rm = TRUE), - delta_bg_sd = sd(delta_bg, na.rm = TRUE) - ) %>% - mutate( - l_rank = rank(mean_L), - k_rank = rank(mean_K), - r_rank = rank(mean_r), - auc_rank = rank(mean_AUC) - ) - - write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE) - write.csv(interaction_scores %>% - arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE) -} - -# Publish z-scores -publish_zscores <- function(df, out_dir) { - zscores <- df %>% - mutate( - zscore_L = scale(L, center = TRUE, scale = TRUE), - zscore_K = scale(K, center = TRUE, scale = TRUE), - zscore_r = scale(r, center = TRUE, scale = TRUE), - zscore_AUC = scale(AUC, center = TRUE, scale = TRUE) - ) - - write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) -} - -# QC plot generation and publication -generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) { - publish_multiple_plots(df, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc) - - delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ] - publish_multiple_plots(delta_bg_above_tolerance, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc, "_above_tolerance") -} - -# Run QC plot generation and publication -generate_and_publish_qc(df, delta_bg_tolerance, args$out_dir_qc) - -# Run summary statistics, interaction score, and z-score calculations and publications -variables <- c("L", "K", "r", "AUC", "delta_bg") -publish_summary_stats(df, variables, args$out_dir) -publish_interaction_scores(df, args$out_dir) -publish_zscores(df, args$out_dir) diff --git a/workflow/apps/r/joinInteractExps.R b/workflow/apps/r/joinInteractExps.R index 3fd986d4..fc85d2ca 100644 --- a/workflow/apps/r/joinInteractExps.R +++ b/workflow/apps/r/joinInteractExps.R @@ -7,7 +7,7 @@ parse_arguments <- function() { "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD", 2, "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv", - "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1", + "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1", "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp2" ) } else { diff --git a/workflow/qhtcp-workflow b/workflow/qhtcp-workflow index 71d820b6..5683d036 100755 --- a/workflow/qhtcp-workflow +++ b/workflow/qhtcp-workflow @@ -427,6 +427,25 @@ execute() { fi } +is_directory_empty() { + local dir="$1" + + # Check if the directory exists and is a directory + if [ ! -d "$dir" ]; then + echo "Directory does not exist or is not a directory" + return 1 + fi + + # Iterate over the files and directories inside the specified directory + for _ in "$dir"/*; do + # If we find at least one entry, it's not empty + return 1 + done + + # If the loop completes without finding any entries, the directory is empty + return 0 +} + # @description Backup one or more files to an incremented .bk file # # **TODO** @@ -1481,8 +1500,10 @@ calculate_interaction_zscores() { EOF declare script="$APPS_DIR/r/calculate_interaction_zscores.R" - declare combined_out_path="${1:-"$STUDY_RESULTS_DIR/zscores"}" - declare -a out_paths=("${EXP_PATHS[@]}/zscores" "$combined_out_path") + declare -a out_paths=("${1:-"$STUDY_RESULTS_DIR/zscores"}") + for path in "${EXP_PATHS[@]}"; do + out_paths+=("${path}/zscores") + done # TODO we'll need to change this behaviour after testing # we can test for existence and then choose to skip or rerun later @@ -1493,7 +1514,7 @@ calculate_interaction_zscores() { done execute "$RSCRIPT" "$script" \ - "${1:-"$out_path"}" \ + "${1:-"$STUDY_RESULTS_DIR"}" \ "${2:-3}" \ "${3:-"$APPS_DIR/r/SGD_features.tab"}" \ "${4:-"$EASY_RESULTS_DIR/results_std.txt"}" \