Add untracked files

This commit is contained in:
2024-08-23 04:01:30 -04:00
parent 1502f6bf38
commit 3098a9dda5
2 changed files with 603 additions and 0 deletions

View File

@@ -0,0 +1,353 @@
suppressMessages({
if (!require("ggplot2")) stop("Package ggplot2 is required but not installed.")
if (!require("plotly")) stop("Package plotly is required but not installed.")
if (!require("htmlwidgets")) stop("Package htmlwidgets is required but not installed.")
if (!require("dplyr")) stop("Package dplyr is required but not installed.")
if (!require("ggthemes")) stop("Package ggthemes is required but not installed.")
if (!require("plyr")) stop("Package plyr is required but not installed.")
})
# Constants for configuration
PLOT_WIDTH <- 14
PLOT_HEIGHT <- 9
BASE_SIZE <- 14
options(warn = 2, max.print = 100)
parse_arguments <- function() {
args <- if (interactive()) {
c(
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD",
3,
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
"Experiment 1: Doxo versus HLD",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
"Experiment 2: HLD versus Doxo"
)
} else {
commandArgs(trailingOnly = TRUE)
}
paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE)
names <- args[seq(6, length(args), by = 2)]
experiments <- setNames(paths, names)
list(
out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
sd = as.numeric(args[2]),
sgd_gene_list = normalizePath(file.path(args[3]), mustWork = FALSE),
easy_results_file = normalizePath(file.path(args[4]), mustWork = FALSE),
experiments = experiments
)
}
args <- parse_arguments()
dir.create(args$out_dir, showWarnings = FALSE)
# Define themes and scales
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_foundation(base_size = base_size, base_family = base_family) +
theme(
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.line = element_line(colour = "black"),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
}
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_publication(base_size, base_family) +
theme(
legend.position = "right",
legend.direction = "vertical",
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic")
)
}
scale_fill_publication <- function(...) {
discrete_scale("fill", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
)), ...)
}
scale_colour_publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
)), ...)
}
# Load SGD gene list
sgd_genes <- function(sgd_gene_list) {
read.delim(file = sgd_gene_list, quote = "", header = FALSE,
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
dplyr::rename(ORF = V4, GeneName = V5)
}
genes <- sgd_genes(args$sgd_gene_list)
load_and_preprocess_data <- function(easy_results_file, genes) {
df <- tryCatch({
read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
}, error = function(e) {
stop("Error reading file: ", easy_results_file, "\n", e$message)
}) %>%
filter(!.[[1]] %in% c("", "Scan"))
numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
df[numeric_columns[numeric_columns %in% colnames(df)]] <-
lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)
df <- df %>%
mutate(
L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA},
AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA},
conc_num = if ("Conc" %in% colnames(.)) as.numeric(gsub("[^0-9\\.]", "", Conc)) else NA,
delta_bg = if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(.)))
LstBackgrd - X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA},
GeneName = vapply(ORF, function(orf) {
gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
}, character(1))
)
if (nrow(df) == 0) warning("Dataframe is empty after filtering")
return(df)
}
create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot
filtered_df <- filter(df, is.finite(.data[[var]]))
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
plot_func(shape = 3, size = 0.2, position = "jitter") +
stat_summary(fun = mean, geom = "point", size = 0.6) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
theme_publication()
pdf_path <- file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))
pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
print(p)
dev.off()
html_path <- sub(".pdf$", ".html", pdf_path)
pgg <- suppressWarnings(ggplotly(p, tooltip = c("L", "K", "ORF", "Gene", "delta_bg", "GeneName")) %>%
layout(legend = list(orientation = "h")))
saveWidget(pgg, html_path, selfcontained = TRUE)
}
publish_summary_stats <- function(df, variables, out_dir) {
stats_list <- lapply(variables, function(var) {
df %>%
dplyr::group_by(OrfRep, conc_num) %>%
dplyr::summarize(
N = dplyr::n(),
mean_val = mean(.data[[var]], na.rm = TRUE),
sd_val = sd(.data[[var]], na.rm = TRUE),
se_val = sd_val / sqrt(N)
)
})
summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable")
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
}
publish_interaction_scores <- function(df, out_dir) {
interaction_scores <- df %>%
group_by(OrfRep) %>%
summarize(
mean_L = mean(L, na.rm = TRUE),
mean_K = mean(K, na.rm = TRUE),
mean_r = mean(r, na.rm = TRUE),
mean_AUC = mean(AUC, na.rm = TRUE),
delta_bg_mean = mean(delta_bg, na.rm = TRUE),
delta_bg_sd = sd(delta_bg, na.rm = TRUE)
) %>%
mutate(
l_rank = rank(mean_L),
k_rank = rank(mean_K),
r_rank = rank(mean_r),
auc_rank = rank(mean_AUC)
)
write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
write.csv(arrange(interaction_scores, l_rank, k_rank),
file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
}
publish_zscores <- function(df, out_dir) {
zscores <- df %>%
mutate(
zscore_L = scale(L, center = TRUE, scale = TRUE),
zscore_K = scale(K, center = TRUE, scale = TRUE),
zscore_r = scale(r, center = TRUE, scale = TRUE),
zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
)
write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
}
generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
variables <- c("L", "K", "r", "AUC", "delta_bg")
lapply(variables, create_and_publish_plot, df = df, plot_type = "scatter", out_dir_qc = out_dir_qc)
delta_bg_above_tolerance <- filter(df, delta_bg >= delta_bg_tolerance)
lapply(variables, create_and_publish_plot, df = delta_bg_above_tolerance,
plot_type = "scatter", out_dir_qc = out_dir_qc, suffix = "_above_tolerance")
}
process_exp_dir <- function(exp_dir, exp_name, genes, easy_results_file) {
out_dir <- file.path(exp_dir, "zscores")
out_dir_qc <- file.path(exp_dir, "qc")
dir.create(out_dir, showWarnings = FALSE)
dir.create(out_dir_qc, showWarnings = FALSE)
df <- load_and_preprocess_data(easy_results_file, genes)
delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + 3 * sd(df$delta_bg, na.rm = TRUE)
generate_and_publish_qc(df, delta_bg_tolerance, out_dir_qc)
variables <- c("L", "K", "r", "AUC", "delta_bg")
publish_summary_stats(df, variables, out_dir)
publish_interaction_scores(df, out_dir)
publish_zscores(df, out_dir)
list(
zscores_file = file.path(out_dir, "zscores_interaction.csv"),
exp_name = exp_name
)
}
processed_experiments <- lapply(names(args$experiments), function(exp_name) {
process_exp_dir(args$experiments[[exp_name]], exp_name, genes, args$easy_results_file)
})
zscores_files <- sapply(processed_experiments, `[[`, "zscores_file")
exp_names <- sapply(processed_experiments, `[[`, "exp_name")
# Sequentially join z-scores similar to original logic
combine_zscores <- function(zscores_files) {
if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
if (length(zscores_files) == 2) {
X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
X <- plyr::join(X1, X2, by = "OrfRep", type = "full")
} else if (length(zscores_files) == 3) {
X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE)
X <- plyr::join(X1, X2, by = "OrfRep", type = "full")
X <- plyr::join(X, X3, by = "OrfRep", type = "full")
} else if (length(zscores_files) == 4) {
X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE)
X4 <- read.csv(file = zscores_files[4], stringsAsFactors = FALSE)
X <- plyr::join(X1, X2, by = "OrfRep", type = "full")
X <- plyr::join(X, X3, by = "OrfRep", type = "full")
X <- plyr::join(X, X4, by = "OrfRep", type = "full")
}
X
}
process_combined_zscores <- function(joined_data, sd, out_dir, exp_names) {
OBH <- joined_data[, order(colnames(joined_data))] # OrderByHeader
headSel <- OBH %>%
select(contains('OrfRep'), matches('Gene'), contains('Z_lm_K'), contains('Z_Shift_K'), contains('Z_lm_L'), contains('Z_Shift_L')) %>%
select(-contains("Gene."))
headSel2 <- OBH %>%
select(contains('OrfRep'), matches('Gene')) %>%
select(-contains("Gene."))
# Fill NA values
fill_na_in_columns <- function(data) {
for (header in colnames(data)) {
if (nrow(data) == 0) {
warning(paste("Empty data frame for column:", header))
}
if (grepl("Shift", header)) {
data[[header]][is.na(data[[header]])] <- 0.001
} else if (grepl("Z_lm_", header)) {
data[[header]][is.na(data[[header]])] <- 0.0001
}
}
data
}
headSel <- fill_na_in_columns(headSel)
# Filter by standard deviation
filter_by_sd <- function(data, sd) {
if (sd == 0) return(data)
z_lm_cols <- select(data, contains("z_lm_"))
filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
data[filter_vector, ]
}
REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), sd)
shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), sd)
# Reorder columns
reorder_columns <- function(data1, data2) {
combined_data <- data1
for (i in 3:ncol(data1)) {
if (nrow(data1) == 0 | nrow(data2) == 0) {
next
}
combined_data <- cbind(combined_data, data2[i], data1[i])
}
combined_data
}
combI <- reorder_columns(headSel2, shiftOnly)
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
# Relabel headers
relabel_headers <- function(headers, exp_names) {
new_labels <- headers
for (i in seq_along(headers)) {
suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
if (suffix %in% seq_along(exp_names)) {
exp_name <- exp_names[as.numeric(suffix)]
new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
}
}
new_labels
}
colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), exp_names)
colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), exp_names)
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
}
joined_data <- combine_zscores(zscores_files)
process_combined_zscores(joined_data, args$sd, args$out_dir, exp_names)

View File

@@ -0,0 +1,250 @@
suppressMessages({
library("ggplot2")
library("plotly")
library("htmlwidgets")
library("dplyr")
library("ggthemes")
})
# Constants for configuration
PLOT_WIDTH <- 14
PLOT_HEIGHT <- 9
BASE_SIZE <- 14
# BACKUP_SUFFIX <- paste0(".backup_", Sys.Date())
options(warn = 2, max.print = 100)
# Parse arguments
parse_arguments <- function() {
if (interactive()) {
args <- c(
"1",
"3",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2/results_std.txt",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1/zscores"
)
} else {
args <- commandArgs(trailingOnly = TRUE)
}
list(
exp_number = as.numeric(args[1]),
delta_bg_factor = as.numeric(args[2]),
study_info_file = normalizePath(file.path(args[3]), mustWork = FALSE),
sgd_gene_list = normalizePath(file.path(args[4]), mustWork = FALSE),
input_file = normalizePath(file.path(args[5]), mustWork = FALSE),
out_dir = normalizePath(file.path(args[6]), mustWork = FALSE),
out_dir_qc = normalizePath(file.path(args[6], "qc"), mustWork = FALSE)
)
}
args <- parse_arguments()
dir.create(args$out_dir, showWarnings = FALSE)
dir.create(args$out_dir_qc, showWarnings = FALSE)
# Define themes and scales
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_foundation(base_size = base_size, base_family = base_family) +
theme(
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.line = element_line(colour = "black"),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
}
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_publication(base_size, base_family) +
theme(
legend.position = "right",
legend.direction = "vertical",
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic")
)
}
scale_fill_publication <- function(...) {
discrete_scale("fill", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
)), ...)
}
scale_colour_publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
)), ...)
}
# Load and preprocess data
load_and_preprocess_data <- function(input_file, genes) {
df <- tryCatch({
read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
}, error = function(e) {
stop("Error reading file: ", input_file, "\n", e$message)
})
# Filter out rows where the first column is empty or "Scan"
df <- df[!df[[1]] %in% c("", "Scan"), ]
# Convert specific columns to numeric
numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
df[numeric_columns[numeric_columns %in% colnames(df)]] <- lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)
# Handle missing critical columns with warnings
df$L <- if ("l" %in% colnames(df)) df$l else {warning("Missing column: l"); NA}
df$AUC <- if ("AUC96" %in% colnames(df)) df$AUC96 else {warning("Missing column: AUC96"); NA}
df$conc_num <- if ("Conc" %in% colnames(df)) as.numeric(gsub("[^0-9\\.]", "", df$Conc)) else NA
df$delta_bg <- if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(df))) df$LstBackgrd - df$X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA}
# Replace ORF IDs with Gene Names using the SGD gene list
df$GeneName <- vapply(df$ORF, function(orf) {
gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
}, character(1))
if (nrow(df) == 0) warning("Dataframe is empty after filtering")
return(df)
}
df <- load_and_preprocess_data(args$input_file)
delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + args$delta_bg_factor * sd(df$delta_bg, na.rm = TRUE)
message("Exp", args$exp_number, ": The delta_bg_factor is: ", args$delta_bg_factor)
message("Exp", args$exp_number, ": The delta_bg_tolerance is ", delta_bg_tolerance)
df <- df %>%
mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21")
# Create plot
create_plot <- function(df, var, plot_type) {
filtered_df <- df %>% filter(is.finite(.data[[var]]))
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
theme_publication()
if (plot_type == "scatter") {
p <- p +
geom_point(shape = 3, size = 0.2, position = "jitter") +
stat_summary(fun = mean, geom = "point", size = 0.6) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar")
} else {
p <- p + geom_boxplot()
}
return(p)
}
# Publish plot to PDF and HTML (Plotly)
publish_plot <- function(plot, plot_path) {
# if (file.exists(plot_path)) {
# file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX))
# }
pdf(plot_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
print(plot)
dev.off()
pgg <- suppressWarnings(ggplotly(plot,
tooltip = c("L", "K", "ORF", "Gene", "delta_bg")) %>%
layout(legend = list(orientation = "h")))
saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE)
}
# Publish multiple plots
publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") {
lapply(variables, function(var) {
plot <- create_plot(df, var, plot_type)
publish_plot(plot, file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf")))
})
}
# Calculate and publish summary statistics
publish_summary_stats <- function(df, variables, out_dir) {
stats_list <- lapply(variables, function(var) {
df %>%
group_by(OrfRep, conc_num) %>%
summarize(
N = n(),
mean_val = mean(.data[[var]], na.rm = TRUE),
sd_val = sd(.data[[var]], na.rm = TRUE),
se_val = sd_val / sqrt(N)
)
})
summary_stats_df <- bind_rows(stats_list, .id = "variable")
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
}
# Calculate and publish interaction scores
publish_interaction_scores <- function(df, out_dir) {
interaction_scores <- df %>%
group_by(OrfRep) %>%
summarize(
mean_L = mean(L, na.rm = TRUE),
mean_K = mean(K, na.rm = TRUE),
mean_r = mean(r, na.rm = TRUE),
mean_AUC = mean(AUC, na.rm = TRUE),
delta_bg_mean = mean(delta_bg, na.rm = TRUE),
delta_bg_sd = sd(delta_bg, na.rm = TRUE)
) %>%
mutate(
l_rank = rank(mean_L),
k_rank = rank(mean_K),
r_rank = rank(mean_r),
auc_rank = rank(mean_AUC)
)
write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
write.csv(interaction_scores %>%
arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
}
# Publish z-scores
publish_zscores <- function(df, out_dir) {
zscores <- df %>%
mutate(
zscore_L = scale(L, center = TRUE, scale = TRUE),
zscore_K = scale(K, center = TRUE, scale = TRUE),
zscore_r = scale(r, center = TRUE, scale = TRUE),
zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
)
write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
}
# QC plot generation and publication
generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
publish_multiple_plots(df, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc)
delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ]
publish_multiple_plots(delta_bg_above_tolerance, c("L", "K", "r", "AUC", "delta_bg"), "scatter", out_dir_qc, "_above_tolerance")
}
# Run QC plot generation and publication
generate_and_publish_qc(df, delta_bg_tolerance, args$out_dir_qc)
# Run summary statistics, interaction score, and z-score calculations and publications
variables <- c("L", "K", "r", "AUC", "delta_bg")
publish_summary_stats(df, variables, args$out_dir)
publish_interaction_scores(df, args$out_dir)
publish_zscores(df, args$out_dir)