Rollup for calculation_interaction_zscores refactor

This commit is contained in:
2024-09-01 03:18:42 -04:00
parent 05cd1a85d8
commit a484a2b104
495 changed files with 412 additions and 6011712 deletions

View File

@@ -7,13 +7,13 @@ suppressMessages({
library(data.table)
})
options(warn = 2, max.print = 1000)
# Constants for configuration
PLOT_WIDTH <- 14
PLOT_HEIGHT <- 9
BASE_SIZE <- 14
options(warn = 2, max.print = 100)
parse_arguments <- function() {
args <- if (interactive()) {
c(
@@ -29,11 +29,11 @@ parse_arguments <- function() {
} else {
commandArgs(trailingOnly = TRUE)
}
paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE)
names <- args[seq(6, length(args), by = 2)]
experiments <- setNames(paths, names)
list(
out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
sd = as.numeric(args[2]),
@@ -49,7 +49,7 @@ dir.create(file.path(args$out_dir, "zscores"), showWarnings = FALSE)
dir.create(file.path(args$out_dir, "zscores", "qc"), showWarnings = FALSE)
# Define themes and scales
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans", legend_position = "bottom") {
theme_foundation(base_size = base_size, base_family = base_family) +
theme(
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
@@ -64,25 +64,14 @@ theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
legend.position = legend_position,
legend.direction = ifelse(legend_position == "right", "vertical", "horizontal"),
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
}
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_publication(base_size, base_family) +
theme(
legend.position = "right",
legend.direction = "vertical",
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic")
)
}
scale_fill_publication <- function(...) {
discrete_scale("fill", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
@@ -100,58 +89,91 @@ scale_colour_publication <- function(...) {
# Load SGD gene list
sgd_genes <- function(sgd_gene_list) {
read.delim(file = sgd_gene_list, quote = "", header = FALSE,
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
dplyr::rename(ORF = V4, GeneName = V5)
}
genes <- sgd_genes(args$sgd_gene_list)
# Define the adjust_values function
adjust_values <- function(x, delta_bg, tolerance) {
valid_x <- x[x != 0 & !is.na(x)]
max_x <- ifelse(length(valid_x) == 0, NA_real_, max(valid_x, na.rm = TRUE))
x_adjusted <- ifelse(x == 0 & !is.na(x), max_x, x)
# Adjust only if delta_bg is valid and above tolerance
x_adjusted <- ifelse(!is.na(delta_bg) & delta_bg >= tolerance, NA_real_, x_adjusted)
return(x_adjusted)
}
load_and_preprocess_data <- function(easy_results_file, genes) {
df <- tryCatch({
df <-
read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
}, error = function(e) {
stop("Error reading file: ", easy_results_file, "\n", e$message)
})
# Convert specific columns to numeric
numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
df[numeric_columns[numeric_columns %in% colnames(df)]] <-
suppressWarnings(lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric))
# Apply initial filters and mutating steps as in the original script
# Apply further transformations
df <- df %>%
mutate(L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA}) %>%
mutate(AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA}) %>%
filter(!.[[1]] %in% c("", "Scan")) %>% # Filter out empty or Scan rows
filter(Gene != "BLANK" & Gene != "Blank" & ORF != "Blank" & Gene != "blank") %>% # Remove blank genes
filter(Drug != "BMH21") %>% # Filter out specific drugs if necessary
filter(!is.na(ORF) & ORF != "") %>% # Ensure ORF is not NA or empty
mutate(OrfRep = ifelse(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
mutate(conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc))) %>% # Extract numeric drug concentrations
filter(!is.na(LstBackgrd) & !is.na(X1stBackgrd)) %>% # Ensure finite background values
mutate(delta_bg = LstBackgrd - X1stBackgrd) %>% # Calculate delta background
filter(!is.na(L) & !is.na(K) & !is.na(r) & !is.na(AUC96)) %>% # Ensure finite measurements
mutate(GeneName = vapply(ORF, function(orf) {
gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
}, character(1)))
filter(
!is.na(ORF) & ORF != "",
!ORF %in% c("Blank"),
!is.na(Gene) & !Gene %in% c("BLANK", "Blank", "blank"),
Drug != "BMH21",
!is.na(Scan),
!is.na(l), !is.na(K),
!is.na(r), !is.na(AUC96)
) %>%
mutate(
Col = suppressWarnings(as.numeric(Col)),
Row = suppressWarnings(as.numeric(Row)),
L = suppressWarnings(as.numeric(l)),
K = suppressWarnings(as.numeric(K)),
r = suppressWarnings(as.numeric(r)),
Scan = suppressWarnings(as.numeric(Scan)),
AUC = suppressWarnings(as.numeric(AUC96)),
LstBackgrd = suppressWarnings(as.numeric(LstBackgrd)),
X1stBackgrd = suppressWarnings(as.numeric(X1stBackgrd)),
delta_bg = ifelse(is.na(LstBackgrd) | is.na(X1stBackgrd), NA_real_, LstBackgrd - X1stBackgrd),
delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + 3 * sd(delta_bg, na.rm = TRUE),
OrfRep = ifelse(ORF == "YDL227C", "YDL227C", OrfRep),
OrfRepUnique = paste(OrfRep, Gene, Num., sep = "_"),
conc_num = suppressWarnings(as.numeric(gsub("[^0-9\\.]", "", Conc))),
DB = as.integer(delta_bg >= delta_bg_tolerance),
SM = 0,
NG = as.integer(l == 0 & !is.na(l)),
L_adjusted = adjust_values(L, delta_bg, delta_bg_tolerance),
r_adjusted = adjust_values(r, delta_bg, delta_bg_tolerance),
AUC_adjusted = adjust_values(AUC, delta_bg, delta_bg_tolerance),
K_adjusted = adjust_values(K, delta_bg, delta_bg_tolerance),
max_L_adjusted = ifelse(all(is.na(L_adjusted)), NA_real_, max(L_adjusted, na.rm = TRUE)),
SM = ifelse(!is.na(max_L_adjusted) & L_adjusted == max_L_adjusted, 1, SM)
)
if (nrow(df) == 0) warning("Dataframe is empty after filtering")
# Efficiently handle the assignment of Gene names
gene_map <- setNames(genes$GeneName, genes$ORF)
df$Gene <- ifelse(df$OrfRep != "YDL227C", gene_map[df$ORF], df$Gene)
df$Gene <- ifelse(df$Gene == "" | df$Gene == "OCT1", df$OrfRep, df$Gene)
return(df)
}
create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
if (!("Scan" %in% colnames(df))) {
warning("'Scan' column is not present in the data. Skipping this plot.")
return(NULL)
}
plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot
filtered_df <- df[is.finite(df[[var]]), ]
p <- ggplot(filtered_df, aes(Scan, !!sym(var), color = as.factor(conc_num))) +
plot_func(shape = 3, size = 0.2, position = "jitter") +
generate_plot <- function(df, var, plot_type = "scatter", out_dir_qc, suffix = "") {
# Filter out non-finite values before plotting
df <- df %>%
filter(is.finite(!!sym(var)))
plot_func <- switch(plot_type,
scatter = geom_point(shape = 3, size = 0.2, position = "jitter"),
boxplot = geom_boxplot())
p <- ggplot(df, aes(Scan, !!sym(var), color = as.factor(conc_num))) +
plot_func +
stat_summary(fun = mean, geom = "point", size = 0.6) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
@@ -161,184 +183,233 @@ create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "")
pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
print(p)
dev.off()
html_path <- sub(".pdf$", ".html", pdf_path)
pgg <- suppressWarnings(ggplotly(p, tooltip = c("L", "K", "ORF", "Gene", "delta_bg", "GeneName")) %>%
pgg <- suppressWarnings(ggplotly(p) %>%
layout(legend = list(orientation = "h")))
saveWidget(pgg, html_path, selfcontained = TRUE)
}
publish_summary_stats <- function(df, variables, out_dir) {
stats_list <- lapply(variables, function(var) {
df %>%
dplyr::group_by(OrfRep, conc_num) %>%
dplyr::summarize(
N = dplyr::n(),
mean_val = mean(.data[[var]], na.rm = TRUE),
sd_val = sd(.data[[var]], na.rm = TRUE),
se_val = sd_val / sqrt(N)
)
})
summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable")
fwrite(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
}
# Compute Interaction Scores
compute_interaction_scores <- function(df) {
# Calculate raw shifts
raw_shifts <- df %>%
group_by(OrfRep) %>%
reframe(
Raw_Shift_L = mean(L, na.rm = TRUE),
Raw_Shift_K = mean(K, na.rm = TRUE),
Raw_Shift_r = mean(r, na.rm = TRUE),
Raw_Shift_AUC = mean(AUC, na.rm = TRUE),
NG = n()
)
# Calculate Z-scores
z_scores <- df %>%
group_by(OrfRep) %>%
reframe(
Z_Shift_L = mean(scale(L, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Z_Shift_K = mean(scale(K, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Z_Shift_r = mean(scale(r, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Z_Shift_AUC = mean(scale(AUC, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE)
)
# Linear Model Scores and R-Squared
lm_scores <- df %>%
group_by(OrfRep) %>%
reframe(
lm_Score_L = coef(lm(L ~ delta_bg, data = .))[2],
R_Squared_L = summary(lm(L ~ delta_bg, data = .))$r.squared,
lm_Score_K = coef(lm(K ~ delta_bg, data = .))[2],
R_Squared_K = summary(lm(K ~ delta_bg, data = .))$r.squared,
lm_Score_r = coef(lm(r ~ delta_bg, data = .))[2],
R_Squared_r = summary(lm(r ~ delta_bg, data = .))$r.squared,
lm_Score_AUC = coef(lm(AUC ~ delta_bg, data = .))[2],
R_Squared_AUC = summary(lm(AUC ~ delta_bg, data = .))$r.squared
)
# Calculate Sum and Average Z-scores
sum_avg_z_scores <- df %>%
group_by(OrfRep) %>%
reframe(
Sum_Z_Score_L = sum(scale(L, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Avg_Zscore_L = mean(scale(L, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Sum_Z_Score_K = sum(scale(K, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Avg_Zscore_K = mean(scale(K, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Sum_Z_Score_r = sum(scale(r, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Avg_Zscore_r = mean(scale(r, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Sum_Z_Score_AUC = sum(scale(AUC, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Avg_Zscore_AUC = mean(scale(AUC, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE)
)
# Combine all calculations
interaction_scores <- raw_shifts %>%
left_join(z_scores, by = "OrfRep") %>%
left_join(lm_scores, by = "OrfRep") %>%
left_join(sum_avg_z_scores, by = "OrfRep") %>%
mutate(
l_rank = rank(-Raw_Shift_L),
k_rank = rank(-Raw_Shift_K),
r_rank = rank(-Raw_Shift_r),
auc_rank = rank(-Raw_Shift_AUC)
)
return(interaction_scores)
}
publish_interaction_scores <- function(df, out_dir) {
interaction_scores <- compute_interaction_scores(df)
# Additional enhancer and suppressor calculations and outputs
deletion_enhancers_L <- interaction_scores[interaction_scores$Raw_Shift_L >= 2, ]
deletion_enhancers_L <- deletion_enhancers_L[!is.na(deletion_enhancers_L$OrfRep), ]
deletion_enhancers_K <- interaction_scores[interaction_scores$Raw_Shift_K <= -2, ]
deletion_enhancers_K <- deletion_enhancers_K[!is.na(deletion_enhancers_K$OrfRep), ]
deletion_suppressors_L <- interaction_scores[interaction_scores$Raw_Shift_L <= -2, ]
deletion_suppressors_L <- deletion_suppressors_L[!is.na(deletion_suppressors_L$OrfRep), ]
deletion_suppressors_K <- interaction_scores[interaction_scores$Raw_Shift_K >= 2, ]
deletion_suppressors_K <- deletion_suppressors_K[!is.na(deletion_suppressors_K$OrfRep), ]
deletion_enhancers_and_suppressors_L <- interaction_scores[
interaction_scores$Raw_Shift_L >= 2 | interaction_scores$Raw_Shift_L <= -2, ]
deletion_enhancers_and_suppressors_L <- deletion_enhancers_and_suppressors_L[
!is.na(deletion_enhancers_and_suppressors_L$OrfRep), ]
deletion_enhancers_and_suppressors_K <- interaction_scores[
interaction_scores$Raw_Shift_K >= 2 | interaction_scores$Raw_Shift_K <= -2, ]
deletion_enhancers_and_suppressors_K <- deletion_enhancers_and_suppressors_K[
!is.na(deletion_enhancers_and_suppressors_K$OrfRep), ]
# Write CSV files with updated filenames
fwrite(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
fwrite(dplyr::arrange(interaction_scores, l_rank, k_rank),
file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
fwrite(deletion_enhancers_L, file.path(out_dir, "deletion_enhancers_l.csv"), row.names = FALSE)
fwrite(deletion_enhancers_K, file.path(out_dir, "deletion_enhancers_k.csv"), row.names = FALSE)
fwrite(deletion_suppressors_L, file.path(out_dir, "deletion_suppressors_l.csv"), row.names = FALSE)
fwrite(deletion_suppressors_K, file.path(out_dir, "deletion_suppressors_k.csv"), row.names = FALSE)
fwrite(deletion_enhancers_and_suppressors_L, file.path(out_dir, "deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE)
fwrite(deletion_enhancers_and_suppressors_K, file.path(out_dir, "deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE)
return(interaction_scores) # Return the updated data frame with all calculated columns
}
publish_zscores <- function(df, out_dir) {
zscores <- df %>%
dplyr::mutate(
zscore_L = scale(Raw_Shift_L, center = TRUE, scale = TRUE),
zscore_K = scale(Raw_Shift_K, center = TRUE, scale = TRUE),
zscore_r = scale(Raw_Shift_r, center = TRUE, scale = TRUE),
zscore_AUC = scale(Raw_Shift_AUC, center = TRUE, scale = TRUE)
)
fwrite(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
}
generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
generate_and_publish_qc <- function(df, out_dir_qc) {
variables <- c("L", "K", "r", "AUC", "delta_bg")
# Pre-QC plots
lapply(variables, function(var) {
# Access delta_bg_tolerance from the dataframe
delta_bg_tolerance <- df$delta_bg_tolerance[1]
for (var in variables) {
if (var %in% colnames(df)) {
create_and_publish_plot(df, var, "scatter", out_dir_qc)
generate_plot(df, var, "scatter", out_dir_qc)
}
})
}
# Filter data based on delta background tolerance for Post-QC analysis
df_post_qc <- df %>%
mutate(across(all_of(variables),
~ ifelse(delta_bg >= delta_bg_tolerance, NA, .)))
mutate(across(all_of(variables), ~ ifelse(delta_bg >= delta_bg_tolerance, NA, .)))
# Post-QC plots
lapply(variables, function(var) {
for (var in variables) {
if (var %in% colnames(df_post_qc)) {
create_and_publish_plot(df_post_qc, var, "scatter", out_dir_qc, suffix = "_after_qc")
generate_plot(df_post_qc, var, "scatter", out_dir_qc, suffix = "_after_qc")
}
})
}
# For plots specifically for data above the tolerance threshold
delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ]
lapply(variables, function(var) {
for (var in variables) {
if (var %in% colnames(delta_bg_above_tolerance)) {
create_and_publish_plot(delta_bg_above_tolerance, var, "scatter", out_dir_qc, suffix = "_above_tolerance")
generate_plot(delta_bg_above_tolerance, var, "scatter", out_dir_qc, suffix = "_above_tolerance")
}
})
}
df_finite <- df %>% filter(is.finite(delta_bg))
delta_bg_frequency_plot <- ggplot(df_finite, aes(delta_bg, color = as.factor(conc_num))) +
geom_density() +
ggtitle("Density plot for Delta Background by Conc All Data") +
theme_publication(legend_position = "right")
delta_bg_bar_plot <- ggplot(df_finite, aes(delta_bg, color = as.factor(conc_num))) +
geom_bar() +
ggtitle("Bar plot for Delta Background by Conc All Data") +
theme_publication(legend_position = "right")
pdf(file = file.path(out_dir_qc, "frequency_delta_background.pdf"), width = 12, height = 8)
print(delta_bg_frequency_plot)
print(delta_bg_bar_plot)
dev.off()
saveWidget(ggplotly(delta_bg_frequency_plot),
file = file.path(out_dir_qc, "frequency_delta_background_density.html"), selfcontained = TRUE)
saveWidget(ggplotly(delta_bg_bar_plot),
file = file.path(out_dir_qc, "frequency_delta_background_bar.html"), selfcontained = TRUE)
df_no_zero <- df %>% filter(L > 0 & is.finite(L))
scatter_plots <- list()
box_plots <- list()
for (var in variables) {
if (var %in% colnames(df_no_zero)) {
scatter_plots[[var]] <- ggplot(df_no_zero, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
geom_point(shape = 3, size = 0.2) +
stat_summary(fun = mean, geom = "point", size = 0.6) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
ggtitle(paste("Plate analysis by Drug Conc for", var, "after quality control")) +
theme_publication()
box_plots[[var]] <- ggplot(df_no_zero, aes(as.factor(Scan), .data[[var]], color = as.factor(conc_num))) +
geom_boxplot() +
ggtitle(paste("Plate analysis by Drug Conc for", var, "after quality control")) +
theme_publication()
}
}
pdf(file = file.path(out_dir_qc, "plate_analysis_no_zeros.pdf"), width = 14, height = 9)
lapply(scatter_plots, print)
dev.off()
pdf(file = file.path(out_dir_qc, "plate_analysis_no_zeros_boxplots.pdf"), width = 18, height = 9)
lapply(box_plots, print)
dev.off()
for (var in names(scatter_plots)) {
html_path <- file.path(out_dir_qc, paste0("plate_analysis_no_zeros_", var, ".html"))
pgg <- suppressWarnings(ggplotly(scatter_plots[[var]]) %>%
layout(legend = list(orientation = "h")))
saveWidget(pgg, html_path, selfcontained = TRUE)
}
for (var in names(box_plots)) {
html_path <- file.path(out_dir_qc, paste0("plate_analysis_no_zeros_boxplots_", var, ".html"))
pgg <- suppressWarnings(ggplotly(box_plots[[var]]) %>%
layout(legend = list(orientation = "h")))
saveWidget(pgg, html_path, selfcontained = TRUE)
}
}
create_rank_plots <- function(interaction_scores, out_dir) {
compute_experiment_summary_stats <- function(df, out_dir, background_strain = NULL) {
if (!is.null(background_strain)) {
df <- df %>% filter(OrfRep == background_strain)
}
summary_stats <- df %>%
group_by(OrfRep, conc_num) %>%
summarize(
N = n(),
mean_L = mean(L, na.rm = TRUE),
median_L = median(L, na.rm = TRUE),
max_L = max(L, na.rm = TRUE),
min_L = min(L, na.rm = TRUE),
sd_L = sd(L, na.rm = TRUE),
se_L = sd_L / sqrt(N),
mean_K = mean(K, na.rm = TRUE),
median_K = median(K, na.rm = TRUE),
max_K = max(K, na.rm = TRUE),
min_K = min(K, na.rm = TRUE),
sd_K = sd(K, na.rm = TRUE),
se_K = sd_K / sqrt(N),
mean_r = mean(r, na.rm = TRUE),
median_r = median(r, na.rm = TRUE),
max_r = max(r, na.rm = TRUE),
min_r = min(r, na.rm = TRUE),
sd_r = sd(r, na.rm = TRUE),
se_r = sd_r / sqrt(N),
mean_AUC = mean(AUC, na.rm = TRUE),
median_AUC = median(AUC, na.rm = TRUE),
max_AUC = max(AUC, na.rm = TRUE),
min_AUC = min(AUC, na.rm = TRUE),
sd_AUC = sd(AUC, na.rm = TRUE),
se_AUC = sd_AUC / sqrt(N)
)
if (is.null(background_strain)) {
fwrite(summary_stats, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
} else {
fwrite(summary_stats, file.path(out_dir, "summary_stats_background_strains.csv"), row.names = FALSE)
}
}
filter_and_calculate_sd_k_stats <- function(df, out_dir_qc) {
df_cleaned <- df %>%
filter(is.finite(L_adjusted) & is.finite(K_adjusted))
df_within_2SD_K <- df_cleaned %>%
filter(abs(K_adjusted - mean(K_adjusted, na.rm = TRUE)) <= 2 * sd(K_adjusted, na.rm = TRUE)) %>%
group_by(conc_num) %>%
summarize(
N = n(),
mean_L = mean(L_adjusted, na.rm = TRUE),
max_L = ifelse(all(is.na(L_adjusted)), NA_real_, max(L_adjusted, na.rm = TRUE)),
sd_L = sd(L_adjusted, na.rm = TRUE)
)
fwrite(df_within_2SD_K, file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2SD_K.csv"))
df_outside_2SD_K <- df_cleaned %>%
filter(abs(K_adjusted - mean(K_adjusted, na.rm = TRUE)) > 2 * sd(K_adjusted, na.rm = TRUE)) %>%
group_by(conc_num) %>%
summarize(
N = n(),
mean_L = mean(L_adjusted, na.rm = TRUE),
max_L = ifelse(all(is.na(L_adjusted)), NA_real_, max(L_adjusted, na.rm = TRUE)),
sd_L = sd(L_adjusted, na.rm = TRUE)
)
fwrite(df_outside_2SD_K, file.path(out_dir_qc, "max_observed_L_vals_for_spots_outside_2SD_K.csv"))
p_raw_before <- ggplot(df_cleaned,
aes(x = L_adjusted, y = K_adjusted, color = as.factor(conc_num))) +
geom_point() +
ggtitle("Raw L vs K before QC") +
theme_publication(legend_position = "right")
pdf(file.path(out_dir_qc, "raw_L_vs_K_before_QC.pdf"))
print(p_raw_before)
dev.off()
saveWidget(ggplotly(p_raw_before), file.path(out_dir_qc, "raw_L_vs_K_before_QC.html"))
p_above_threshold <- ggplot(df_cleaned %>% filter(delta_bg > delta_bg_tolerance),
aes(x = L_adjusted, y = K_adjusted, color = as.factor(conc_num))) +
geom_point() +
ggtitle("Raw L vs K for strains above delta background threshold") +
theme_publication(legend_position = "right")
pdf(file.path(out_dir_qc, "raw_L_vs_K_for_strains_above_delta_bg_threshold.pdf"))
print(p_above_threshold)
dev.off()
saveWidget(ggplotly(p_above_threshold), file.path(out_dir_qc, "raw_L_vs_K_for_strains_above_delta_bg_threshold.html"))
}
compute_rf_interaction_scores <- function(df_rf, output_dir) {
lm_sd_L <- sd(df_rf$lm_Score_L, na.rm = TRUE)
lm_sd_K <- sd(df_rf$lm_Score_K, na.rm = TRUE)
lm_sd_r <- sd(df_rf$lm_Score_r, na.rm = TRUE)
lm_sd_AUC <- sd(df_rf$lm_Score_AUC, na.rm = TRUE)
lm_mean_L <- mean(df_rf$lm_Score_L, na.rm = TRUE)
lm_mean_K <- mean(df_rf$lm_Score_K, na.rm = TRUE)
lm_mean_r <- mean(df_rf$lm_Score_r, na.rm = TRUE)
lm_mean_AUC <- mean(df_rf$lm_Score_AUC, na.rm = TRUE)
df_rf <- df_rf %>%
mutate(
Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
)
df_rf <- df_rf %>%
arrange(desc(Z_lm_L), desc(NG))
fwrite(df_rf, file.path(output_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
return(df_rf)
}
create_rank_plots <- function(interaction_scores_ranks, out_dir) {
rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank")
lapply(rank_vars, function(rank_var) {
p <- ggplot(interaction_scores, aes(x = !!sym(rank_var))) +
p <- ggplot(interaction_scores_ranks, aes(x = !!sym(rank_var))) +
geom_bar() +
ggtitle(paste("Rank Distribution for", rank_var)) +
theme_publication()
@@ -347,8 +418,7 @@ create_rank_plots <- function(interaction_scores, out_dir) {
pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
print(p)
dev.off()
# Generate HTML output
html_path <- sub(".pdf$", ".html", pdf_path)
pgg <- suppressWarnings(ggplotly(p) %>%
layout(legend = list(orientation = "h")))
@@ -357,11 +427,9 @@ create_rank_plots <- function(interaction_scores, out_dir) {
}
create_correlation_plot <- function(interaction_scores, out_dir) {
# Check for non-finite values and remove them from the dataset
interaction_scores <- interaction_scores %>%
filter_all(all_vars(is.finite(.)))
# Generate correlation plots for each pair of variables
pairs <- list(
c("Raw_Shift_L", "Raw_Shift_K"),
c("Raw_Shift_L", "Raw_Shift_r"),
@@ -370,7 +438,7 @@ create_correlation_plot <- function(interaction_scores, out_dir) {
c("Raw_Shift_K", "Raw_Shift_AUC"),
c("Raw_Shift_r", "Raw_Shift_AUC")
)
lapply(pairs, function(vars) {
p <- ggplot(interaction_scores, aes(x = !!sym(vars[1]), y = !!sym(vars[2]))) +
geom_point() +
@@ -382,8 +450,7 @@ create_correlation_plot <- function(interaction_scores, out_dir) {
pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
print(p)
dev.off()
# Generate HTML output
html_path <- sub(".pdf$", ".html", pdf_path)
pgg <- suppressWarnings(ggplotly(p, tooltip = c(vars[1], vars[2])) %>%
layout(legend = list(orientation = "h")))
@@ -397,260 +464,124 @@ process_experiment <- function(exp_name, exp_dir, genes, output_dir) {
dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
dir.create(out_dir_qc, showWarnings = FALSE)
# Load and preprocess the data
data <- load_and_preprocess_data(args$easy_results_file, genes)
# Calculate delta background tolerance
delta_bg_tolerance <- mean(data$delta_bg, na.rm = TRUE) + 3 * sd(data$delta_bg, na.rm = TRUE)
# Generate and publish QC plots
generate_and_publish_qc(data, out_dir_qc)
# Generate and publish QC plots (both pre-QC and post-QC)
generate_and_publish_qc(data, delta_bg_tolerance, out_dir_qc)
# Calculate and save summary statistics
compute_experiment_summary_stats(data, out_dir)
compute_experiment_summary_stats(data, out_dir, background_strain = "YDL227C")
InteractionScores_RF <- data %>%
select(OrfRepUnique) %>%
distinct() %>%
mutate(
Gene = NA,
Raw_Shift_L = NA, Z_Shift_L = NA, lm_Score_L = NA, Z_lm_L = NA,
R_Squared_L = NA, Sum_Z_Score_L = NA, Avg_Zscore_L = NA,
Raw_Shift_K = NA, Z_Shift_K = NA, lm_Score_K = NA, Z_lm_K = NA,
R_Squared_K = NA, Sum_Z_Score_K = NA, Avg_Zscore_K = NA,
Raw_Shift_r = NA, Z_Shift_r = NA, lm_Score_r = NA, Z_lm_r = NA,
R_Squared_r = NA, Sum_Z_Score_r = NA, Avg_Zscore_r = NA,
Raw_Shift_AUC = NA, Z_Shift_AUC = NA, lm_Score_AUC = NA, Z_lm_AUC = NA,
R_Squared_AUC = NA, Sum_Z_Score_AUC = NA, Avg_Zscore_AUC = NA,
NG = NA, SM = NA
)
for (i in seq_along(unique(InteractionScores_RF$OrfRepUnique))) {
Gene_Sel <- unique(InteractionScores_RF$OrfRepUnique)[i]
X_Gene_Sel <- data[data$OrfRepUnique == Gene_Sel,]
X_stats_interaction <- X_Gene_Sel %>%
group_by(OrfRepUnique, Gene) %>%
summarize(
Raw_Shift_L = mean(L_adjusted, na.rm = TRUE),
Raw_Shift_K = mean(K_adjusted, na.rm = TRUE),
Raw_Shift_r = mean(r_adjusted, na.rm = TRUE),
Raw_Shift_AUC = mean(AUC_adjusted, na.rm = TRUE),
Z_Shift_L = mean(scale(L_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Z_Shift_K = mean(scale(K_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Z_Shift_r = mean(scale(r_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Z_Shift_AUC = mean(scale(AUC_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
lm_Score_L = coef(lm(L_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
R_Squared_L = summary(lm(L_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
lm_Score_K = coef(lm(K_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
R_Squared_K = summary(lm(K_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
lm_Score_r = coef(lm(r_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
R_Squared_r = summary(lm(r_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
lm_Score_AUC = coef(lm(AUC_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
R_Squared_AUC = summary(lm(AUC_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
NG = sum(NG, na.rm = TRUE),
DB = sum(DB, na.rm = TRUE),
SM = sum(SM, na.rm = TRUE)
)
InteractionScores_RF <- InteractionScores_RF %>%
mutate(
Gene = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Gene[1], Gene),
Raw_Shift_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_L[1], Raw_Shift_L),
Z_Shift_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_L[1], Z_Shift_L),
lm_Score_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_L[1], lm_Score_L),
R_Squared_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_L[1], R_Squared_L),
Sum_Z_Score_L = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_L, na.rm = TRUE), Sum_Z_Score_L),
Avg_Zscore_L = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_L, na.rm = TRUE), Avg_Zscore_L),
Raw_Shift_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_K[1], Raw_Shift_K),
Z_Shift_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_K[1], Z_Shift_K),
lm_Score_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_K[1], lm_Score_K),
R_Squared_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_K[1], R_Squared_K),
Sum_Z_Score_K = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_K, na.rm = TRUE), Sum_Z_Score_K),
Avg_Zscore_K = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_K, na.rm = TRUE), Avg_Zscore_K),
Raw_Shift_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_r[1], Raw_Shift_r),
Z_Shift_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_r[1], Z_Shift_r),
lm_Score_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_r[1], lm_Score_r),
R_Squared_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_r[1], R_Squared_r),
Sum_Z_Score_r = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_r, na.rm = TRUE), Sum_Z_Score_r),
Avg_Zscore_r = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_r, na.rm = TRUE), Avg_Zscore_r),
Raw_Shift_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_AUC[1], Raw_Shift_AUC),
Z_Shift_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_AUC[1], Z_Shift_AUC),
lm_Score_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_AUC[1], lm_Score_AUC),
R_Squared_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_AUC[1], R_Squared_AUC),
Sum_Z_Score_AUC = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_AUC, na.rm = TRUE), Sum_Z_Score_AUC),
Avg_Zscore_AUC = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_AUC, na.rm = TRUE), Avg_Zscore_AUC),
NG = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$NG[1], NG),
DB = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$DB[1], DB),
SM = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$SM[1], SM)
)
}
fwrite(InteractionScores_RF, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
generate_and_publish_qc(data, out_dir_qc)
filter_and_calculate_sd_k_stats(data, out_dir_qc)
# Process and publish summary stats, interaction scores, and z-scores
variables <- c("L", "K", "r", "AUC", "delta_bg")
publish_summary_stats(data, variables, out_dir)
interaction_scores <- publish_interaction_scores(data, out_dir)
publish_zscores(interaction_scores, out_dir) # Now writing interaction_scores, not original data
compute_experiment_summary_stats(data, variables, out_dir)
compute_experiment_summary_stats(data, variables, out_dir, background_strain = "YDL227C")
# Generate rank plots and correlation plots
create_rank_plots(interaction_scores, out_dir)
create_correlation_plot(interaction_scores, out_dir)
compute_rf_interaction_scores(InteractionScores_RF, out_dir)
publish_scores(InteractionScores_RF, out_dir)
create_rank_plots(InteractionScores_RF, out_dir)
create_correlation_plot(InteractionScores_RF, out_dir)
output_file <- file.path(out_dir, "zscores_interaction.csv")
fwrite(interaction_scores, output_file, row.names = FALSE) # Write interaction_scores here
fwrite(InteractionScores_RF, output_file, row.names = FALSE)
return(output_file)
}
# Process all experiments
processed_files <- lapply(names(args$experiments), function(exp_name) {
process_experiment(exp_name, args$experiments[[exp_name]], genes, args$out_dir)
})
# Combine results from all experiments if multiple experiments exist
if (length(processed_files) > 1) {
combined_data <- Reduce(function(x, y) {
merge(fread(x), fread(y), by = "OrfRep", all = TRUE, allow.cartesian = TRUE)
merge(fread(x), fread(y), by = "OrfRepUnique", all = TRUE)
}, processed_files)
combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv")
fwrite(combined_data, combined_output_file, row.names = FALSE)
combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv")
fwrite(combined_data, combined_output_file, row.names = FALSE)
}
# publish_interaction_scores <- function(df, out_dir) {
# interaction_scores <- df %>%
# dplyr::group_by(OrfRep) %>%
# dplyr::summarize(
# mean_L = mean(L, na.rm = TRUE),
# mean_K = mean(K, na.rm = TRUE),
# mean_r = mean(r, na.rm = TRUE),
# mean_AUC = mean(AUC, na.rm = TRUE),
# delta_bg_mean = mean(delta_bg, na.rm = TRUE),
# delta_bg_sd = sd(delta_bg, na.rm = TRUE)
# ) %>%
# dplyr::mutate(
# l_rank = rank(mean_L),
# k_rank = rank(mean_K),
# r_rank = rank(mean_r),
# auc_rank = rank(mean_AUC)
# )
# fwrite(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
# fwrite(dplyr::arrange(interaction_scores, l_rank, k_rank),
# file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
# # Additional enhancer and suppressor calculations and outputs
# deletion_enhancers_L <- interaction_scores[interaction_scores$mean_L >= 2, ]
# deletion_enhancers_L <- deletion_enhancers_L[!is.na(deletion_enhancers_L$OrfRep), ]
# deletion_enhancers_K <- interaction_scores[interaction_scores$mean_K <= -2, ]
# deletion_enhancers_K <- deletion_enhancers_K[!is.na(deletion_enhancers_K$OrfRep), ]
# deletion_suppressors_L <- interaction_scores[interaction_scores$mean_L <= -2, ]
# deletion_suppressors_L <- deletion_suppressors_L[!is.na(deletion_suppressors_L$OrfRep), ]
# deletion_suppressors_K <- interaction_scores[interaction_scores$mean_K >= 2, ]
# deletion_suppressors_K <- deletion_suppressors_K[!is.na(deletion_suppressors_K$OrfRep), ]
# deletion_enhancers_and_suppressors_L <- interaction_scores[
# interaction_scores$mean_L >= 2 | interaction_scores$mean_L <= -2, ]
# deletion_enhancers_and_suppressors_L <- deletion_enhancers_and_suppressors_L[
# !is.na(deletion_enhancers_and_suppressors_L$OrfRep), ]
# deletion_enhancers_and_suppressors_K <- interaction_scores[
# interaction_scores$mean_K >= 2 | interaction_scores$mean_K <= -2, ]
# deletion_enhancers_and_suppressors_K <- deletion_enhancers_and_suppressors_K[
# !is.na(deletion_enhancers_and_suppressors_K$OrfRep), ]
# # Write CSV files with updated filenames
# fwrite(deletion_enhancers_L, file.path(out_dir, "deletion_enhancers_l.csv"), row.names = FALSE)
# fwrite(deletion_enhancers_K, file.path(out_dir, "deletion_enhancers_k.csv"), row.names = FALSE)
# fwrite(deletion_suppressors_L, file.path(out_dir, "deletion_suppressors_l.csv"), row.names = FALSE)
# fwrite(deletion_suppressors_K, file.path(out_dir, "deletion_suppressors_k.csv"), row.names = FALSE)
# fwrite(deletion_enhancers_and_suppressors_L, file.path(out_dir, "deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE)
# fwrite(deletion_enhancers_and_suppressors_K, file.path(out_dir, "deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE)
# return(interaction_scores) # Return the updated data frame with rank columns
# }
# publish_zscores <- function(df, out_dir) {
# zscores <- df %>%
# dplyr::mutate(
# zscore_L = scale(L, center = TRUE, scale = TRUE),
# zscore_K = scale(K, center = TRUE, scale = TRUE),
# zscore_r = scale(r, center = TRUE, scale = TRUE),
# zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
# )
# fwrite(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
# }
# generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
# variables <- c("L", "K", "r", "AUC", "delta_bg")
# # Pre-QC plots
# lapply(variables, function(var) {
# if (var %in% colnames(df)) {
# create_and_publish_plot(df, var, "scatter", out_dir_qc)
# }
# })
# # Filter data based on delta background tolerance for Post-QC analysis
# df_post_qc <- df %>%
# mutate(across(all_of(variables),
# ~ ifelse(delta_bg >= delta_bg_tolerance, NA, .)))
# # Post-QC plots
# lapply(variables, function(var) {
# if (var %in% colnames(df_post_qc)) {
# create_and_publish_plot(df_post_qc, var, "scatter", out_dir_qc, suffix = "_after_qc")
# }
# })
# # For plots specifically for data above the tolerance threshold
# delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ]
# lapply(variables, function(var) {
# if (var %in% colnames(delta_bg_above_tolerance)) {
# create_and_publish_plot(delta_bg_above_tolerance, var, "scatter", out_dir_qc, suffix = "_above_tolerance")
# }
# })
# }
# # Create rank plots
# create_rank_plots <- function(interaction_scores, out_dir) {
# rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank")
# lapply(rank_vars, function(rank_var) {
# p <- ggplot(interaction_scores, aes(x = !!sym(rank_var))) +
# geom_bar() +
# ggtitle(paste("Rank Distribution for", rank_var)) +
# theme_publication()
# pdf_path <- file.path(out_dir, paste0("rank_distribution_", rank_var, ".pdf"))
# pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
# print(p)
# dev.off()
# # Generate HTML output
# html_path <- sub(".pdf$", ".html", pdf_path)
# pgg <- suppressWarnings(ggplotly(p) %>%
# layout(legend = list(orientation = "h")))
# saveWidget(pgg, html_path, selfcontained = TRUE)
# })
# }
# create_correlation_plot <- function(interaction_scores, out_dir) {
# # Check for non-finite values and remove them from the dataset
# interaction_scores <- interaction_scores %>%
# filter_all(all_vars(is.finite(.)))
# # Generate correlation plots for each pair of variables
# pairs <- list(
# c("mean_L", "mean_K"),
# c("mean_L", "mean_r"),
# c("mean_L", "mean_AUC"),
# c("mean_K", "mean_r"),
# c("mean_K", "mean_AUC"),
# c("mean_r", "mean_AUC")
# )
# lapply(pairs, function(vars) {
# p <- ggplot(interaction_scores, aes(x = !!sym(vars[1]), y = !!sym(vars[2]))) +
# geom_point() +
# geom_smooth(method = "lm", se = FALSE) +
# ggtitle(paste("Correlation between", vars[1], "and", vars[2])) +
# theme_publication()
# pdf_path <- file.path(out_dir, paste0("correlation_", vars[1], "_", vars[2], ".pdf"))
# pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
# print(p)
# dev.off()
# # Generate HTML output
# html_path <- sub(".pdf$", ".html", pdf_path)
# pgg <- suppressWarnings(ggplotly(p, tooltip = c(vars[1], vars[2])) %>%
# layout(legend = list(orientation = "h")))
# saveWidget(pgg, html_path, selfcontained = TRUE)
# })
# }
# process_experiment <- function(exp_name, exp_dir, genes, output_dir) {
# out_dir <- file.path(exp_dir, "zscores")
# out_dir_qc <- file.path(out_dir, "qc")
# dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
# dir.create(out_dir_qc, showWarnings = FALSE)
# # Load and preprocess the data
# data <- load_and_preprocess_data(args$easy_results_file, genes)
# # Calculate delta background tolerance
# delta_bg_tolerance <- mean(data$delta_bg, na.rm = TRUE) + 3 * sd(data$delta_bg, na.rm = TRUE)
# # Generate and publish QC plots (both pre-QC and post-QC)
# generate_and_publish_qc(data, delta_bg_tolerance, out_dir_qc)
# # Process and publish summary stats, interaction scores, and z-scores
# variables <- c("L", "K", "r", "AUC", "delta_bg")
# publish_summary_stats(data, variables, out_dir)
# interaction_scores <- publish_interaction_scores(data, out_dir)
# publish_zscores(data, out_dir)
# # Generate rank plots and correlation plots
# create_rank_plots(interaction_scores, out_dir)
# create_correlation_plot(interaction_scores, out_dir)
# output_file <- file.path(out_dir, "zscores_interaction.csv")
# fwrite(data, output_file, row.names = FALSE)
# return(output_file)
# }
# # Process all experiments
# processed_files <- lapply(names(args$experiments), function(exp_name) {
# process_experiment(exp_name, args$experiments[[exp_name]], genes, args$out_dir)
# })
# # Combine results from all experiments if multiple experiments exist
# if (length(processed_files) > 1) {
# combined_data <- Reduce(function(x, y) {
# merge(fread(x), fread(y), by = "OrfRep", all = TRUE, allow.cartesian = TRUE)
# }, processed_files)
# combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv")
# fwrite(combined_data, combined_output_file, row.names = FALSE)
# }