Output interaction df in calculate_interaction_zscores.R

This commit is contained in:
2024-08-24 02:30:39 -04:00
parent 3cb6e89360
commit 17521b817f

View File

@@ -100,7 +100,7 @@ scale_colour_publication <- function(...) {
# Load SGD gene list # Load SGD gene list
sgd_genes <- function(sgd_gene_list) { sgd_genes <- function(sgd_gene_list) {
read.delim(file = sgd_gene_list, quote = "", header = FALSE, read.delim(file = sgd_gene_list, quote = "", header = FALSE,
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>% colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
dplyr::rename(ORF = V4, GeneName = V5) dplyr::rename(ORF = V4, GeneName = V5)
} }
@@ -123,7 +123,6 @@ load_and_preprocess_data <- function(easy_results_file, genes) {
mutate(L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA}) %>% mutate(L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA}) %>%
mutate(AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA}) %>% mutate(AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA}) %>%
filter(!.[[1]] %in% c("", "Scan")) %>% # Filter out empty or Scan rows filter(!.[[1]] %in% c("", "Scan")) %>% # Filter out empty or Scan rows
# {if ("Conc" %in% colnames(.)) filter(., Conc != "0ug/mL") else .} %>%
filter(Gene != "BLANK" & Gene != "Blank" & ORF != "Blank" & Gene != "blank") %>% # Remove blank genes filter(Gene != "BLANK" & Gene != "Blank" & ORF != "Blank" & Gene != "blank") %>% # Remove blank genes
filter(Drug != "BMH21") %>% # Filter out specific drugs if necessary filter(Drug != "BMH21") %>% # Filter out specific drugs if necessary
filter(!is.na(ORF) & ORF != "") %>% # Ensure ORF is not NA or empty filter(!is.na(ORF) & ORF != "") %>% # Ensure ORF is not NA or empty
@@ -142,8 +141,6 @@ load_and_preprocess_data <- function(easy_results_file, genes) {
return(df) return(df)
} }
create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") { create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
if (!("Scan" %in% colnames(df))) { if (!("Scan" %in% colnames(df))) {
warning("'Scan' column is not present in the data. Skipping this plot.") warning("'Scan' column is not present in the data. Skipping this plot.")
@@ -186,52 +183,103 @@ publish_summary_stats <- function(df, variables, out_dir) {
fwrite(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) fwrite(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
} }
publish_interaction_scores <- function(df, out_dir) { # Compute Interaction Scores
interaction_scores <- df %>% compute_interaction_scores <- function(df) {
dplyr::group_by(OrfRep) %>% # Calculate raw shifts
dplyr::summarize( raw_shifts <- df %>%
mean_L = mean(L, na.rm = TRUE), group_by(OrfRep) %>%
mean_K = mean(K, na.rm = TRUE), reframe(
mean_r = mean(r, na.rm = TRUE), Raw_Shift_L = mean(L, na.rm = TRUE),
mean_AUC = mean(AUC, na.rm = TRUE), Raw_Shift_K = mean(K, na.rm = TRUE),
delta_bg_mean = mean(delta_bg, na.rm = TRUE), Raw_Shift_r = mean(r, na.rm = TRUE),
delta_bg_sd = sd(delta_bg, na.rm = TRUE) Raw_Shift_AUC = mean(AUC, na.rm = TRUE),
) %>% NG = n()
dplyr::mutate(
l_rank = rank(mean_L),
k_rank = rank(mean_K),
r_rank = rank(mean_r),
auc_rank = rank(mean_AUC)
) )
# Calculate Z-scores
z_scores <- df %>%
group_by(OrfRep) %>%
reframe(
Z_Shift_L = mean(scale(L, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Z_Shift_K = mean(scale(K, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Z_Shift_r = mean(scale(r, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Z_Shift_AUC = mean(scale(AUC, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE)
)
# Linear Model Scores and R-Squared
lm_scores <- df %>%
group_by(OrfRep) %>%
reframe(
lm_Score_L = coef(lm(L ~ delta_bg, data = .))[2],
R_Squared_L = summary(lm(L ~ delta_bg, data = .))$r.squared,
lm_Score_K = coef(lm(K ~ delta_bg, data = .))[2],
R_Squared_K = summary(lm(K ~ delta_bg, data = .))$r.squared,
lm_Score_r = coef(lm(r ~ delta_bg, data = .))[2],
R_Squared_r = summary(lm(r ~ delta_bg, data = .))$r.squared,
lm_Score_AUC = coef(lm(AUC ~ delta_bg, data = .))[2],
R_Squared_AUC = summary(lm(AUC ~ delta_bg, data = .))$r.squared
)
# Calculate Sum and Average Z-scores
sum_avg_z_scores <- df %>%
group_by(OrfRep) %>%
reframe(
Sum_Z_Score_L = sum(scale(L, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Avg_Zscore_L = mean(scale(L, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Sum_Z_Score_K = sum(scale(K, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Avg_Zscore_K = mean(scale(K, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Sum_Z_Score_r = sum(scale(r, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Avg_Zscore_r = mean(scale(r, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Sum_Z_Score_AUC = sum(scale(AUC, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
Avg_Zscore_AUC = mean(scale(AUC, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE)
)
# Combine all calculations
interaction_scores <- raw_shifts %>%
left_join(z_scores, by = "OrfRep") %>%
left_join(lm_scores, by = "OrfRep") %>%
left_join(sum_avg_z_scores, by = "OrfRep") %>%
mutate(
l_rank = rank(-Raw_Shift_L),
k_rank = rank(-Raw_Shift_K),
r_rank = rank(-Raw_Shift_r),
auc_rank = rank(-Raw_Shift_AUC)
)
return(interaction_scores)
}
fwrite(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
fwrite(dplyr::arrange(interaction_scores, l_rank, k_rank),
file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
publish_interaction_scores <- function(df, out_dir) {
interaction_scores <- compute_interaction_scores(df)
# Additional enhancer and suppressor calculations and outputs # Additional enhancer and suppressor calculations and outputs
deletion_enhancers_L <- interaction_scores[interaction_scores$mean_L >= 2, ] deletion_enhancers_L <- interaction_scores[interaction_scores$Raw_Shift_L >= 2, ]
deletion_enhancers_L <- deletion_enhancers_L[!is.na(deletion_enhancers_L$OrfRep), ] deletion_enhancers_L <- deletion_enhancers_L[!is.na(deletion_enhancers_L$OrfRep), ]
deletion_enhancers_K <- interaction_scores[interaction_scores$mean_K <= -2, ] deletion_enhancers_K <- interaction_scores[interaction_scores$Raw_Shift_K <= -2, ]
deletion_enhancers_K <- deletion_enhancers_K[!is.na(deletion_enhancers_K$OrfRep), ] deletion_enhancers_K <- deletion_enhancers_K[!is.na(deletion_enhancers_K$OrfRep), ]
deletion_suppressors_L <- interaction_scores[interaction_scores$mean_L <= -2, ] deletion_suppressors_L <- interaction_scores[interaction_scores$Raw_Shift_L <= -2, ]
deletion_suppressors_L <- deletion_suppressors_L[!is.na(deletion_suppressors_L$OrfRep), ] deletion_suppressors_L <- deletion_suppressors_L[!is.na(deletion_suppressors_L$OrfRep), ]
deletion_suppressors_K <- interaction_scores[interaction_scores$mean_K >= 2, ] deletion_suppressors_K <- interaction_scores[interaction_scores$Raw_Shift_K >= 2, ]
deletion_suppressors_K <- deletion_suppressors_K[!is.na(deletion_suppressors_K$OrfRep), ] deletion_suppressors_K <- deletion_suppressors_K[!is.na(deletion_suppressors_K$OrfRep), ]
deletion_enhancers_and_suppressors_L <- interaction_scores[ deletion_enhancers_and_suppressors_L <- interaction_scores[
interaction_scores$mean_L >= 2 | interaction_scores$mean_L <= -2, ] interaction_scores$Raw_Shift_L >= 2 | interaction_scores$Raw_Shift_L <= -2, ]
deletion_enhancers_and_suppressors_L <- deletion_enhancers_and_suppressors_L[ deletion_enhancers_and_suppressors_L <- deletion_enhancers_and_suppressors_L[
!is.na(deletion_enhancers_and_suppressors_L$OrfRep), ] !is.na(deletion_enhancers_and_suppressors_L$OrfRep), ]
deletion_enhancers_and_suppressors_K <- interaction_scores[ deletion_enhancers_and_suppressors_K <- interaction_scores[
interaction_scores$mean_K >= 2 | interaction_scores$mean_K <= -2, ] interaction_scores$Raw_Shift_K >= 2 | interaction_scores$Raw_Shift_K <= -2, ]
deletion_enhancers_and_suppressors_K <- deletion_enhancers_and_suppressors_K[ deletion_enhancers_and_suppressors_K <- deletion_enhancers_and_suppressors_K[
!is.na(deletion_enhancers_and_suppressors_K$OrfRep), ] !is.na(deletion_enhancers_and_suppressors_K$OrfRep), ]
# Write CSV files with updated filenames # Write CSV files with updated filenames
fwrite(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
fwrite(dplyr::arrange(interaction_scores, l_rank, k_rank),
file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
fwrite(deletion_enhancers_L, file.path(out_dir, "deletion_enhancers_l.csv"), row.names = FALSE) fwrite(deletion_enhancers_L, file.path(out_dir, "deletion_enhancers_l.csv"), row.names = FALSE)
fwrite(deletion_enhancers_K, file.path(out_dir, "deletion_enhancers_k.csv"), row.names = FALSE) fwrite(deletion_enhancers_K, file.path(out_dir, "deletion_enhancers_k.csv"), row.names = FALSE)
fwrite(deletion_suppressors_L, file.path(out_dir, "deletion_suppressors_l.csv"), row.names = FALSE) fwrite(deletion_suppressors_L, file.path(out_dir, "deletion_suppressors_l.csv"), row.names = FALSE)
@@ -239,23 +287,22 @@ publish_interaction_scores <- function(df, out_dir) {
fwrite(deletion_enhancers_and_suppressors_L, file.path(out_dir, "deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE) fwrite(deletion_enhancers_and_suppressors_L, file.path(out_dir, "deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE)
fwrite(deletion_enhancers_and_suppressors_K, file.path(out_dir, "deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE) fwrite(deletion_enhancers_and_suppressors_K, file.path(out_dir, "deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE)
return(interaction_scores) # Return the updated data frame with rank columns return(interaction_scores) # Return the updated data frame with all calculated columns
} }
publish_zscores <- function(df, out_dir) { publish_zscores <- function(df, out_dir) {
zscores <- df %>% zscores <- df %>%
dplyr::mutate( dplyr::mutate(
zscore_L = scale(L, center = TRUE, scale = TRUE), zscore_L = scale(Raw_Shift_L, center = TRUE, scale = TRUE),
zscore_K = scale(K, center = TRUE, scale = TRUE), zscore_K = scale(Raw_Shift_K, center = TRUE, scale = TRUE),
zscore_r = scale(r, center = TRUE, scale = TRUE), zscore_r = scale(Raw_Shift_r, center = TRUE, scale = TRUE),
zscore_AUC = scale(AUC, center = TRUE, scale = TRUE) zscore_AUC = scale(Raw_Shift_AUC, center = TRUE, scale = TRUE)
) )
fwrite(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE) fwrite(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
} }
generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) { generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
variables <- c("L", "K", "r", "AUC", "delta_bg") variables <- c("L", "K", "r", "AUC", "delta_bg")
@@ -268,7 +315,7 @@ generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
# Filter data based on delta background tolerance for Post-QC analysis # Filter data based on delta background tolerance for Post-QC analysis
df_post_qc <- df %>% df_post_qc <- df %>%
mutate(across(all_of(variables), mutate(across(all_of(variables),
~ ifelse(delta_bg >= delta_bg_tolerance, NA, .))) ~ ifelse(delta_bg >= delta_bg_tolerance, NA, .)))
# Post-QC plots # Post-QC plots
@@ -287,9 +334,6 @@ generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
}) })
} }
# Create rank plots
create_rank_plots <- function(interaction_scores, out_dir) { create_rank_plots <- function(interaction_scores, out_dir) {
rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank") rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank")
@@ -319,12 +363,12 @@ create_correlation_plot <- function(interaction_scores, out_dir) {
# Generate correlation plots for each pair of variables # Generate correlation plots for each pair of variables
pairs <- list( pairs <- list(
c("mean_L", "mean_K"), c("Raw_Shift_L", "Raw_Shift_K"),
c("mean_L", "mean_r"), c("Raw_Shift_L", "Raw_Shift_r"),
c("mean_L", "mean_AUC"), c("Raw_Shift_L", "Raw_Shift_AUC"),
c("mean_K", "mean_r"), c("Raw_Shift_K", "Raw_Shift_r"),
c("mean_K", "mean_AUC"), c("Raw_Shift_K", "Raw_Shift_AUC"),
c("mean_r", "mean_AUC") c("Raw_Shift_r", "Raw_Shift_AUC")
) )
lapply(pairs, function(vars) { lapply(pairs, function(vars) {
@@ -347,7 +391,6 @@ create_correlation_plot <- function(interaction_scores, out_dir) {
}) })
} }
process_experiment <- function(exp_name, exp_dir, genes, output_dir) { process_experiment <- function(exp_name, exp_dir, genes, output_dir) {
out_dir <- file.path(exp_dir, "zscores") out_dir <- file.path(exp_dir, "zscores")
out_dir_qc <- file.path(out_dir, "qc") out_dir_qc <- file.path(out_dir, "qc")
@@ -367,14 +410,14 @@ process_experiment <- function(exp_name, exp_dir, genes, output_dir) {
variables <- c("L", "K", "r", "AUC", "delta_bg") variables <- c("L", "K", "r", "AUC", "delta_bg")
publish_summary_stats(data, variables, out_dir) publish_summary_stats(data, variables, out_dir)
interaction_scores <- publish_interaction_scores(data, out_dir) interaction_scores <- publish_interaction_scores(data, out_dir)
publish_zscores(data, out_dir) publish_zscores(interaction_scores, out_dir) # Now writing interaction_scores, not original data
# Generate rank plots and correlation plots # Generate rank plots and correlation plots
create_rank_plots(interaction_scores, out_dir) create_rank_plots(interaction_scores, out_dir)
create_correlation_plot(interaction_scores, out_dir) create_correlation_plot(interaction_scores, out_dir)
output_file <- file.path(out_dir, "zscores_interaction.csv") output_file <- file.path(out_dir, "zscores_interaction.csv")
fwrite(data, output_file, row.names = FALSE) fwrite(interaction_scores, output_file, row.names = FALSE) # Write interaction_scores here
return(output_file) return(output_file)
} }
@@ -390,6 +433,224 @@ if (length(processed_files) > 1) {
merge(fread(x), fread(y), by = "OrfRep", all = TRUE, allow.cartesian = TRUE) merge(fread(x), fread(y), by = "OrfRep", all = TRUE, allow.cartesian = TRUE)
}, processed_files) }, processed_files)
combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv") combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv")
fwrite(combined_data, combined_output_file, row.names = FALSE) fwrite(combined_data, combined_output_file, row.names = FALSE)
} }
# publish_interaction_scores <- function(df, out_dir) {
# interaction_scores <- df %>%
# dplyr::group_by(OrfRep) %>%
# dplyr::summarize(
# mean_L = mean(L, na.rm = TRUE),
# mean_K = mean(K, na.rm = TRUE),
# mean_r = mean(r, na.rm = TRUE),
# mean_AUC = mean(AUC, na.rm = TRUE),
# delta_bg_mean = mean(delta_bg, na.rm = TRUE),
# delta_bg_sd = sd(delta_bg, na.rm = TRUE)
# ) %>%
# dplyr::mutate(
# l_rank = rank(mean_L),
# k_rank = rank(mean_K),
# r_rank = rank(mean_r),
# auc_rank = rank(mean_AUC)
# )
# fwrite(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
# fwrite(dplyr::arrange(interaction_scores, l_rank, k_rank),
# file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
# # Additional enhancer and suppressor calculations and outputs
# deletion_enhancers_L <- interaction_scores[interaction_scores$mean_L >= 2, ]
# deletion_enhancers_L <- deletion_enhancers_L[!is.na(deletion_enhancers_L$OrfRep), ]
# deletion_enhancers_K <- interaction_scores[interaction_scores$mean_K <= -2, ]
# deletion_enhancers_K <- deletion_enhancers_K[!is.na(deletion_enhancers_K$OrfRep), ]
# deletion_suppressors_L <- interaction_scores[interaction_scores$mean_L <= -2, ]
# deletion_suppressors_L <- deletion_suppressors_L[!is.na(deletion_suppressors_L$OrfRep), ]
# deletion_suppressors_K <- interaction_scores[interaction_scores$mean_K >= 2, ]
# deletion_suppressors_K <- deletion_suppressors_K[!is.na(deletion_suppressors_K$OrfRep), ]
# deletion_enhancers_and_suppressors_L <- interaction_scores[
# interaction_scores$mean_L >= 2 | interaction_scores$mean_L <= -2, ]
# deletion_enhancers_and_suppressors_L <- deletion_enhancers_and_suppressors_L[
# !is.na(deletion_enhancers_and_suppressors_L$OrfRep), ]
# deletion_enhancers_and_suppressors_K <- interaction_scores[
# interaction_scores$mean_K >= 2 | interaction_scores$mean_K <= -2, ]
# deletion_enhancers_and_suppressors_K <- deletion_enhancers_and_suppressors_K[
# !is.na(deletion_enhancers_and_suppressors_K$OrfRep), ]
# # Write CSV files with updated filenames
# fwrite(deletion_enhancers_L, file.path(out_dir, "deletion_enhancers_l.csv"), row.names = FALSE)
# fwrite(deletion_enhancers_K, file.path(out_dir, "deletion_enhancers_k.csv"), row.names = FALSE)
# fwrite(deletion_suppressors_L, file.path(out_dir, "deletion_suppressors_l.csv"), row.names = FALSE)
# fwrite(deletion_suppressors_K, file.path(out_dir, "deletion_suppressors_k.csv"), row.names = FALSE)
# fwrite(deletion_enhancers_and_suppressors_L, file.path(out_dir, "deletion_enhancers_and_suppressors_l.csv"), row.names = FALSE)
# fwrite(deletion_enhancers_and_suppressors_K, file.path(out_dir, "deletion_enhancers_and_suppressors_k.csv"), row.names = FALSE)
# return(interaction_scores) # Return the updated data frame with rank columns
# }
# publish_zscores <- function(df, out_dir) {
# zscores <- df %>%
# dplyr::mutate(
# zscore_L = scale(L, center = TRUE, scale = TRUE),
# zscore_K = scale(K, center = TRUE, scale = TRUE),
# zscore_r = scale(r, center = TRUE, scale = TRUE),
# zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
# )
# fwrite(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
# }
# generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
# variables <- c("L", "K", "r", "AUC", "delta_bg")
# # Pre-QC plots
# lapply(variables, function(var) {
# if (var %in% colnames(df)) {
# create_and_publish_plot(df, var, "scatter", out_dir_qc)
# }
# })
# # Filter data based on delta background tolerance for Post-QC analysis
# df_post_qc <- df %>%
# mutate(across(all_of(variables),
# ~ ifelse(delta_bg >= delta_bg_tolerance, NA, .)))
# # Post-QC plots
# lapply(variables, function(var) {
# if (var %in% colnames(df_post_qc)) {
# create_and_publish_plot(df_post_qc, var, "scatter", out_dir_qc, suffix = "_after_qc")
# }
# })
# # For plots specifically for data above the tolerance threshold
# delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ]
# lapply(variables, function(var) {
# if (var %in% colnames(delta_bg_above_tolerance)) {
# create_and_publish_plot(delta_bg_above_tolerance, var, "scatter", out_dir_qc, suffix = "_above_tolerance")
# }
# })
# }
# # Create rank plots
# create_rank_plots <- function(interaction_scores, out_dir) {
# rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank")
# lapply(rank_vars, function(rank_var) {
# p <- ggplot(interaction_scores, aes(x = !!sym(rank_var))) +
# geom_bar() +
# ggtitle(paste("Rank Distribution for", rank_var)) +
# theme_publication()
# pdf_path <- file.path(out_dir, paste0("rank_distribution_", rank_var, ".pdf"))
# pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
# print(p)
# dev.off()
# # Generate HTML output
# html_path <- sub(".pdf$", ".html", pdf_path)
# pgg <- suppressWarnings(ggplotly(p) %>%
# layout(legend = list(orientation = "h")))
# saveWidget(pgg, html_path, selfcontained = TRUE)
# })
# }
# create_correlation_plot <- function(interaction_scores, out_dir) {
# # Check for non-finite values and remove them from the dataset
# interaction_scores <- interaction_scores %>%
# filter_all(all_vars(is.finite(.)))
# # Generate correlation plots for each pair of variables
# pairs <- list(
# c("mean_L", "mean_K"),
# c("mean_L", "mean_r"),
# c("mean_L", "mean_AUC"),
# c("mean_K", "mean_r"),
# c("mean_K", "mean_AUC"),
# c("mean_r", "mean_AUC")
# )
# lapply(pairs, function(vars) {
# p <- ggplot(interaction_scores, aes(x = !!sym(vars[1]), y = !!sym(vars[2]))) +
# geom_point() +
# geom_smooth(method = "lm", se = FALSE) +
# ggtitle(paste("Correlation between", vars[1], "and", vars[2])) +
# theme_publication()
# pdf_path <- file.path(out_dir, paste0("correlation_", vars[1], "_", vars[2], ".pdf"))
# pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
# print(p)
# dev.off()
# # Generate HTML output
# html_path <- sub(".pdf$", ".html", pdf_path)
# pgg <- suppressWarnings(ggplotly(p, tooltip = c(vars[1], vars[2])) %>%
# layout(legend = list(orientation = "h")))
# saveWidget(pgg, html_path, selfcontained = TRUE)
# })
# }
# process_experiment <- function(exp_name, exp_dir, genes, output_dir) {
# out_dir <- file.path(exp_dir, "zscores")
# out_dir_qc <- file.path(out_dir, "qc")
# dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
# dir.create(out_dir_qc, showWarnings = FALSE)
# # Load and preprocess the data
# data <- load_and_preprocess_data(args$easy_results_file, genes)
# # Calculate delta background tolerance
# delta_bg_tolerance <- mean(data$delta_bg, na.rm = TRUE) + 3 * sd(data$delta_bg, na.rm = TRUE)
# # Generate and publish QC plots (both pre-QC and post-QC)
# generate_and_publish_qc(data, delta_bg_tolerance, out_dir_qc)
# # Process and publish summary stats, interaction scores, and z-scores
# variables <- c("L", "K", "r", "AUC", "delta_bg")
# publish_summary_stats(data, variables, out_dir)
# interaction_scores <- publish_interaction_scores(data, out_dir)
# publish_zscores(data, out_dir)
# # Generate rank plots and correlation plots
# create_rank_plots(interaction_scores, out_dir)
# create_correlation_plot(interaction_scores, out_dir)
# output_file <- file.path(out_dir, "zscores_interaction.csv")
# fwrite(data, output_file, row.names = FALSE)
# return(output_file)
# }
# # Process all experiments
# processed_files <- lapply(names(args$experiments), function(exp_name) {
# process_experiment(exp_name, args$experiments[[exp_name]], genes, args$out_dir)
# })
# # Combine results from all experiments if multiple experiments exist
# if (length(processed_files) > 1) {
# combined_data <- Reduce(function(x, y) {
# merge(fread(x), fread(y), by = "OrfRep", all = TRUE, allow.cartesian = TRUE)
# }, processed_files)
# combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv")
# fwrite(combined_data, combined_output_file, row.names = FALSE)
# }