Implement calculate_interaction_zscores.R

This commit is contained in:
2024-08-23 04:01:13 -04:00
parent bcc8dba3e4
commit 1502f6bf38
4 changed files with 101 additions and 380 deletions

View File

@@ -1,10 +1,10 @@
suppressMessages({
if (!require("ggplot2")) stop("Package ggplot2 is required but not installed.")
if (!require("plotly")) stop("Package plotly is required but not installed.")
if (!require("htmlwidgets")) stop("Package htmlwidgets is required but not installed.")
if (!require("dplyr")) stop("Package dplyr is required but not installed.")
if (!require("ggthemes")) stop("Package ggthemes is required but not installed.")
if (!require("plyr")) stop("Package plyr is required but not installed.")
library("ggplot2")
library("plotly")
library("htmlwidgets")
library("dplyr")
library("ggthemes")
library("data.table")
})
# Constants for configuration
@@ -14,9 +14,10 @@ BASE_SIZE <- 14
options(warn = 2, max.print = 100)
# Function to parse arguments
parse_arguments <- function() {
args <- if (interactive()) {
c(
if (interactive()) {
args <- c(
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD",
3,
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
@@ -24,14 +25,10 @@ parse_arguments <- function() {
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
"Experiment 1: Doxo versus HLD",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
"Experiment 2: HLD versus Doxo",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp3",
"Experiment 3: HLD versus WT",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp4",
"Experiment 4: Doxo versus WT"
"Experiment 2: HLD versus Doxo"
)
} else {
commandArgs(trailingOnly = TRUE)
args <- commandArgs(trailingOnly = TRUE)
}
paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE)
@@ -49,7 +46,9 @@ parse_arguments <- function() {
args <- parse_arguments()
dir.create(args$out_dir, showWarnings = FALSE)
# Ensure main output directories exist
dir.create(file.path(args$out_dir, "zscores"), showWarnings = FALSE)
dir.create(file.path(args$out_dir, "zscores", "qc"), showWarnings = FALSE)
# Define themes and scales
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
@@ -110,18 +109,26 @@ sgd_genes <- function(sgd_gene_list) {
genes <- sgd_genes(args$sgd_gene_list)
load_and_preprocess_data <- function(easy_results_file, genes) {
# Attempt to read the file and handle any errors that occur
df <- tryCatch({
read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
}, error = function(e) {
stop("Error reading file: ", easy_results_file, "\n", e$message)
}) %>%
filter(!.[[1]] %in% c("", "Scan")) # Fixed syntax
})
# Filter out non-data rows
df <- df %>%
filter(!.[[1]] %in% c("", "Scan"))
# Convert specified columns to numeric
numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
df[numeric_columns[numeric_columns %in% colnames(df)]] <-
lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)
# Further filter and mutate the data
df <- df %>%
filter(!(Scan %in% c("", "Scan"))) %>%
{if ("Conc" %in% colnames(.)) filter(., Conc != "0ug/mL") else .} %>%
mutate(
L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA},
AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA},
@@ -131,17 +138,20 @@ load_and_preprocess_data <- function(easy_results_file, genes) {
GeneName = vapply(ORF, function(orf) {
gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
}, character(1)) # Ensures a character vector is returned
}, character(1))
)
# Check if the dataframe is empty after filtering
if (nrow(df) == 0) warning("Dataframe is empty after filtering")
return(df)
}
# Plot creation and publication
create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot
filtered_df <- filter(df, is.finite(.data[[var]]))
filtered_df <- df[is.finite(df[[var]]), ]
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
plot_func(shape = 3, size = 0.2, position = "jitter") +
@@ -161,27 +171,27 @@ create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "")
saveWidget(pgg, html_path, selfcontained = TRUE)
}
# Summary statistics publication
publish_summary_stats <- function(df, variables, out_dir) {
stats_list <- lapply(variables, function(var) {
df %>%
dplyr::group_by(OrfRep, conc_num) %>%
dplyr::summarize(
N = dplyr::n(), # Ensure that the correct version of n() is used
N = dplyr::n(),
mean_val = mean(.data[[var]], na.rm = TRUE),
sd_val = sd(.data[[var]], na.rm = TRUE),
se_val = sd_val / sqrt(N)
)
})
summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable")
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
fwrite(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
}
# Interaction scores publication
publish_interaction_scores <- function(df, out_dir) {
interaction_scores <- df %>%
group_by(OrfRep) %>%
summarize(
dplyr::group_by(OrfRep) %>%
dplyr::summarize(
mean_L = mean(L, na.rm = TRUE),
mean_K = mean(K, na.rm = TRUE),
mean_r = mean(r, na.rm = TRUE),
@@ -189,147 +199,75 @@ publish_interaction_scores <- function(df, out_dir) {
delta_bg_mean = mean(delta_bg, na.rm = TRUE),
delta_bg_sd = sd(delta_bg, na.rm = TRUE)
) %>%
mutate(
dplyr::mutate(
l_rank = rank(mean_L),
k_rank = rank(mean_K),
r_rank = rank(mean_r),
auc_rank = rank(mean_AUC)
)
write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
write.csv(arrange(interaction_scores, l_rank, k_rank),
fwrite(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
fwrite(dplyr::arrange(interaction_scores, l_rank, k_rank),
file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
}
# Z-scores publication
publish_zscores <- function(df, out_dir) {
zscores <- df %>%
mutate(
dplyr::mutate(
zscore_L = scale(L, center = TRUE, scale = TRUE),
zscore_K = scale(K, center = TRUE, scale = TRUE),
zscore_r = scale(r, center = TRUE, scale = TRUE),
zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
)
write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
fwrite(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
}
# QC generation and publication
generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
variables <- c("L", "K", "r", "AUC", "delta_bg")
lapply(variables, create_and_publish_plot, df = df, plot_type = "scatter", out_dir_qc = out_dir_qc)
delta_bg_above_tolerance <- filter(df, delta_bg >= delta_bg_tolerance)
delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ]
lapply(variables, create_and_publish_plot, df = delta_bg_above_tolerance,
plot_type = "scatter", out_dir_qc = out_dir_qc, suffix = "_above_tolerance")
}
process_exp_dir <- function(exp_dir, exp_name, genes, easy_results_file) {
# Process experiments
process_experiment <- function(exp_name, exp_dir, sgd_genes, output_dir) {
out_dir <- file.path(exp_dir, "zscores")
out_dir_qc <- file.path(exp_dir, "qc")
dir.create(out_dir, showWarnings = FALSE)
out_dir_qc <- file.path(out_dir, "qc")
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
dir.create(out_dir_qc, showWarnings = FALSE)
df <- load_and_preprocess_data(easy_results_file, genes)
delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + 3 * sd(df$delta_bg, na.rm = TRUE)
generate_and_publish_qc(df, delta_bg_tolerance, out_dir_qc)
data <- load_and_preprocess_data(args$easy_results_file, sgd_genes)
delta_bg_tolerance <- mean(data$delta_bg, na.rm = TRUE) + 3 * sd(data$delta_bg, na.rm = TRUE)
generate_and_publish_qc(data, delta_bg_tolerance, out_dir_qc)
variables <- c("L", "K", "r", "AUC", "delta_bg")
publish_summary_stats(df, variables, out_dir)
publish_interaction_scores(df, out_dir)
publish_zscores(df, out_dir)
publish_summary_stats(data, variables, out_dir)
publish_interaction_scores(data, out_dir)
publish_zscores(data, out_dir)
list(
zscores_file = file.path(out_dir, "zscores_interaction.csv"),
exp_name = exp_name
)
output_file <- file.path(out_dir, "zscores_interaction.csv")
fwrite(data, output_file, row.names = FALSE)
return(output_file)
}
processed_experiments <- lapply(names(args$experiments), function(exp_name) {
process_exp_dir(args$experiments[[exp_name]], exp_name, genes, args$easy_results_file)
# Process all experiments
processed_files <- lapply(names(args$experiments), function(exp_name) {
process_experiment(exp_name, args$experiments[[exp_name]], genes, args$out_dir)
})
zscores_files <- sapply(processed_experiments, `[[`, "zscores_file")
exp_names <- sapply(processed_experiments, `[[`, "exp_name")
# Combine results from all experiments if multiple experiments exist
if (length(processed_files) > 1) {
combined_data <- Reduce(function(x, y) {
merge(fread(x), fread(y), by = "OrfRep", all = TRUE, allow.cartesian = TRUE)
}, processed_files)
combine_zscores <- function(zscores_files) {
if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
joined_data <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
for (file in zscores_files[-1]) {
temp_data <- read.csv(file = file, stringsAsFactors = FALSE)
joined_data <- plyr::join(joined_data, temp_data, by = "OrfRep", type = "full")
combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv")
fwrite(combined_data, combined_output_file, row.names = FALSE)
}
joined_data
}
process_combined_zscores <- function(joined_data, sd, out_dir, exp_names) {
ordered_data <- joined_data %>%
select(contains("OrfRep"), matches("Gene"),
contains("z_lm_k"), contains("z_shift_k"),
contains("z_lm_l"), contains("z_shift_l")) %>%
arrange(contains("OrfRep"))
clean_headers <- function(data, suffixes) {
suffixes_to_remove <- paste0("Gene.", seq_len(suffixes))
select(data, -all_of(suffixes_to_remove))
}
headSel <- clean_headers(ordered_data, length(zscores_files) - 1)
headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1)
fill_na_in_columns <- function(data) {
for (header in colnames(data)) {
if (grepl("Shift", header)) {
data[[header]][is.na(data[[header]])] <- 0.001
} else if (grepl("Z_lm_", header)) {
data[[header]][is.na(data[[header]])] <- 0.0001
}
}
data
}
headSel <- fill_na_in_columns(headSel)
filter_by_sd <- function(data, sd) {
if (sd == 0) return(data)
z_lm_cols <- select(data, contains("z_lm_"))
filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
data[filter_vector, ]
}
REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), sd)
shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), sd)
reorder_columns <- function(data1, data2) {
combined_data <- data1
for (i in 3:ncol(data1)) {
combined_data <- cbind(combined_data, data2[i], data1[i])
}
combined_data
}
combI <- reorder_columns(headSel2, shiftOnly)
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
relabel_headers <- function(headers, exp_names) {
new_labels <- headers
for (i in seq_along(headers)) {
suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
if (suffix %in% seq_along(exp_names)) {
exp_name <- exp_names[as.numeric(suffix)]
new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
}
}
new_labels
}
colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), exp_names)
colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), exp_names)
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
}
joined_data <- combine_zscores(zscores_files)
process_combined_zscores(joined_data, args$sd, args$out_dir, exp_names)

View File

@@ -1,238 +0,0 @@
suppressMessages({
library("ggplot2")
library("plotly")
library("htmlwidgets")
library("dplyr")
library("ggthemes")
})
# Constants for configuration
PLOT_WIDTH <- 14
PLOT_HEIGHT <- 9
BASE_SIZE <- 14
# BACKUP_SUFFIX <- paste0(".backup_", Sys.Date())
options(warn = 2, max.print = 100)
# Parse arguments
parse_arguments <- function() {
if (interactive()) {
args <- c(
"1",
"3",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2/results_std.txt",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1/zscores"
)
} else {
args <- commandArgs(trailingOnly = TRUE)
}
list(
exp_number = as.numeric(args[1]),
delta_bg_factor = as.numeric(args[2]),
study_info_file = normalizePath(file.path(args[3]), mustWork = FALSE),
sgd_gene_list = normalizePath(file.path(args[4]), mustWork = FALSE),
input_file = normalizePath(file.path(args[5]), mustWork = FALSE),
out_dir = normalizePath(file.path(args[6]), mustWork = FALSE),
out_dir_qc = normalizePath(file.path(args[6], "qc"), mustWork = FALSE)
)
}
args <- parse_arguments()
dir.create(args$out_dir, showWarnings = FALSE)
dir.create(args$out_dir_qc, showWarnings = FALSE)
# Define themes and scales
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_foundation(base_size = base_size, base_family = base_family) +
theme(
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.line = element_line(colour = "black"),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
}
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_publication(base_size, base_family) +
theme(
legend.position = "right",
legend.direction = "vertical",
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic")
)
}
scale_fill_publication <- function(...) {
discrete_scale("fill", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
)), ...)
}
scale_colour_publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
)), ...)
}
# Load and preprocess data
load_and_preprocess_data <- function(input_file) {
df <- tryCatch({
read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
}, error = function(e) {
stop("Error reading file: ", input_file, "\n", e$message)
})
df <- df[!df[[1]] %in% c("", "Scan"), ]
numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
df[numeric_columns[numeric_columns %in% colnames(df)]] <- lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)
df$L <- if ("l" %in% colnames(df)) df$l else {warning("Missing column: l"); NA}
df$AUC <- if ("AUC96" %in% colnames(df)) df$AUC96 else {warning("Missing column: AUC96"); NA}
df$conc_num <- if ("Conc" %in% colnames(df)) as.numeric(gsub("[^0-9\\.]", "", df$Conc)) else NA
df$delta_bg <- if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(df))) df$LstBackgrd - df$X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA}
if (nrow(df) == 0) warning("Dataframe is empty after filtering")
df
}
df <- load_and_preprocess_data(args$input_file)
delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + args$delta_bg_factor * sd(df$delta_bg, na.rm = TRUE)
message("Exp", args$exp_number, ": The delta_bg_factor is: ", args$delta_bg_factor)
message("Exp", args$exp_number, ": The delta_bg_tolerance is ", delta_bg_tolerance)
df <- df %>%
mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21")
# Create plot
create_plot <- function(df, var, plot_type) {
filtered_df <- df %>% filter(is.finite(.data[[var]]))
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
theme_publication()
if (plot_type == "scatter") {
p <- p +
geom_point(shape = 3, size = 0.2, position = "jitter") +
stat_summary(fun = mean, geom = "point", size = 0.6) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar")
} else {
p <- p + geom_boxplot()
}
return(p)
}
# Publish plot to PDF and HTML (Plotly)
publish_plot <- function(plot, plot_path) {
# if (file.exists(plot_path)) {
# file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX))
# }
pdf(plot_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
print(plot)
dev.off()
pgg <- suppressWarnings(ggplotly(plot,
tooltip = c("L", "K", "ORF", "Gene", "delta_bg")) %>%
layout(legend = list(orientation = "h")))
saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE)
}
# Publish multiple plots
publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") {
lapply(variables, function(var) {
plot <- create_plot(df, var, plot_type)
publish_plot(plot, file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf")))
})
}
# Calculate and publish summary statistics
publish_summary_stats <- function(df, variables, out_dir) {
stats_list <- lapply(variables, function(var) {
df %>%
group_by(OrfRep, conc_num) %>%
summarize(
N = n(),
mean_val = mean(.data[[var]], na.rm = TRUE),
sd_val = sd(.data[[var]], na.rm = TRUE),
se_val = sd_val / sqrt(N)
)
})
summary_stats_df <- bind_rows(stats_list, .id = "variable")
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
}
# Calculate and publish interaction scores
publish_interaction_scores <- function(df, out_dir) {
interaction_scores <- df %>%
group_by(OrfRep) %>%
summarize(
mean_L = mean(L, na.rm = TRUE),
mean_K = mean(K, na.rm = TRUE),
mean_r = mean(r, na.rm = TRUE),
mean_AUC = mean(AUC, na.rm = TRUE),
delta_bg_mean = mean(delta_bg, na.rm = TRUE),
delta_bg_sd = sd(delta_bg, na.rm = TRUE)
) %>%
mutate(
l_rank = rank(mean_L),
k_rank = rank(mean_K),
r_rank = rank(mean_r),
auc_rank = rank(mean_AUC)
)
write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
write.csv(interaction_scores %>%
arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
}
# Publish z-scores
publish_zscores <- function(df, out_dir) {
zscores <- df %>%
mutate(
zscore_L = scale(L, center = TRUE, scale = TRUE),
zscore_K = scale(K, center = TRUE, scale = TRUE),
zscore_r = scale(r, center = TRUE, scale = TRUE),
zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
)
write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
}
# QC plot generation and publication
generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
publish_multiple_plots(df, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc)
delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ]
publish_multiple_plots(delta_bg_above_tolerance, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc, "_above_tolerance")
}
# Run QC plot generation and publication
generate_and_publish_qc(df, delta_bg_tolerance, args$out_dir_qc)
# Run summary statistics, interaction score, and z-score calculations and publications
variables <- c("L", "K", "r", "AUC", "delta_bg")
publish_summary_stats(df, variables, args$out_dir)
publish_interaction_scores(df, args$out_dir)
publish_zscores(df, args$out_dir)

View File

@@ -427,6 +427,25 @@ execute() {
fi
}
is_directory_empty() {
local dir="$1"
# Check if the directory exists and is a directory
if [ ! -d "$dir" ]; then
echo "Directory does not exist or is not a directory"
return 1
fi
# Iterate over the files and directories inside the specified directory
for _ in "$dir"/*; do
# If we find at least one entry, it's not empty
return 1
done
# If the loop completes without finding any entries, the directory is empty
return 0
}
# @description Backup one or more files to an incremented .bk file
#
# **TODO**
@@ -1481,8 +1500,10 @@ calculate_interaction_zscores() {
EOF
declare script="$APPS_DIR/r/calculate_interaction_zscores.R"
declare combined_out_path="${1:-"$STUDY_RESULTS_DIR/zscores"}"
declare -a out_paths=("${EXP_PATHS[@]}/zscores" "$combined_out_path")
declare -a out_paths=("${1:-"$STUDY_RESULTS_DIR/zscores"}")
for path in "${EXP_PATHS[@]}"; do
out_paths+=("${path}/zscores")
done
# TODO we'll need to change this behaviour after testing
# we can test for existence and then choose to skip or rerun later
@@ -1493,7 +1514,7 @@ calculate_interaction_zscores() {
done
execute "$RSCRIPT" "$script" \
"${1:-"$out_path"}" \
"${1:-"$STUDY_RESULTS_DIR"}" \
"${2:-3}" \
"${3:-"$APPS_DIR/r/SGD_features.tab"}" \
"${4:-"$EASY_RESULTS_DIR/results_std.txt"}" \