Cleanup calculate_interaction_zscores files
This commit is contained in:
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,945 +0,0 @@
|
||||
suppressMessages({
|
||||
library(ggplot2)
|
||||
library(plotly)
|
||||
library(htmlwidgets)
|
||||
library(dplyr)
|
||||
library(ggthemes)
|
||||
library(data.table)
|
||||
library(unix)
|
||||
})
|
||||
|
||||
options(warn = 2, max.print = 1000)
|
||||
options(width = 10000)
|
||||
|
||||
# Set the memory limit to 30GB (30 * 1024 * 1024 * 1024 bytes)
|
||||
soft_limit <- 30 * 1024 * 1024 * 1024
|
||||
hard_limit <- 30 * 1024 * 1024 * 1024
|
||||
rlimit_as(soft_limit, hard_limit)
|
||||
|
||||
# Constants for configuration
|
||||
plot_width <- 14
|
||||
plot_height <- 9
|
||||
base_size <- 14
|
||||
|
||||
parse_arguments <- function() {
|
||||
args <- if (interactive()) {
|
||||
c(
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
|
||||
"Experiment 1: Doxo versus HLD",
|
||||
3,
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
|
||||
"Experiment 2: HLD versus Doxo",
|
||||
3
|
||||
)
|
||||
} else {
|
||||
commandArgs(trailingOnly = TRUE)
|
||||
}
|
||||
|
||||
# Extract paths, names, and standard deviations
|
||||
paths <- args[seq(4, length(args), by = 3)]
|
||||
names <- args[seq(5, length(args), by = 3)]
|
||||
sds <- as.numeric(args[seq(6, length(args), by = 3)])
|
||||
|
||||
# Normalize paths
|
||||
normalized_paths <- normalizePath(paths, mustWork = FALSE)
|
||||
|
||||
# Create named list of experiments
|
||||
experiments <- list()
|
||||
for (i in seq_along(paths)) {
|
||||
experiments[[names[i]]] <- list(
|
||||
path = normalized_paths[i],
|
||||
sd = sds[i]
|
||||
)
|
||||
}
|
||||
|
||||
list(
|
||||
out_dir = normalizePath(args[1], mustWork = FALSE),
|
||||
sgd_gene_list = normalizePath(args[2], mustWork = FALSE),
|
||||
easy_results_file = normalizePath(args[3], mustWork = FALSE),
|
||||
experiments = experiments
|
||||
)
|
||||
}
|
||||
|
||||
args <- parse_arguments()
|
||||
|
||||
# Should we keep output in exp dirs or combine in the study output dir?
|
||||
# dir.create(file.path(args$out_dir, "zscores"), showWarnings = FALSE)
|
||||
# dir.create(file.path(args$out_dir, "zscores", "qc"), showWarnings = FALSE)
|
||||
|
||||
# Define themes and scales
|
||||
theme_publication <- function(base_size = 14, base_family = "sans", legend_position = "bottom") {
|
||||
theme_foundation <- ggplot2::theme_grey(base_size = base_size, base_family = base_family)
|
||||
|
||||
theme_foundation %+replace%
|
||||
theme(
|
||||
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold", size = rel(1)),
|
||||
axis.title.y = element_text(angle = 90, vjust = 2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.line = element_line(colour = "black"),
|
||||
panel.grid.major = element_line(colour = "#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = legend_position,
|
||||
legend.direction = ifelse(legend_position == "right", "vertical", "horizontal"),
|
||||
plot.margin = unit(c(10, 5, 5, 5), "mm"),
|
||||
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
|
||||
strip.text = element_text(face = "bold")
|
||||
)
|
||||
}
|
||||
|
||||
scale_fill_publication <- function(...) {
|
||||
discrete_scale("fill", "Publication", manual_pal(values = c(
|
||||
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
||||
)), ...)
|
||||
}
|
||||
|
||||
scale_colour_publication <- function(...) {
|
||||
discrete_scale("colour", "Publication", manual_pal(values = c(
|
||||
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
||||
)), ...)
|
||||
}
|
||||
|
||||
# Load the initial dataframe from the easy_results_file
|
||||
load_and_process_data <- function(easy_results_file, sd = 3) {
|
||||
df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
||||
|
||||
df <- df %>%
|
||||
filter(!(.[[1]] %in% c("", "Scan"))) %>%
|
||||
filter(!is.na(ORF) & ORF != "" & !Gene %in% c("BLANK", "Blank", "blank") & Drug != "BMH21") %>%
|
||||
# Rename columns
|
||||
rename(L = l, num = Num., AUC = AUC96, scan = Scan, last_bg = LstBackgrd, first_bg = X1stBackgrd) %>%
|
||||
mutate(
|
||||
across(c(Col, Row, num, L, K, r, scan, AUC, last_bg, first_bg), as.numeric),
|
||||
delta_bg = last_bg - first_bg,
|
||||
delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + (sd * sd(delta_bg, na.rm = TRUE)),
|
||||
NG = if_else(L == 0 & !is.na(L), 1, 0),
|
||||
DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0),
|
||||
SM = 0,
|
||||
OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded?
|
||||
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
|
||||
conc_num_factor = as.numeric(as.factor(conc_num)) - 1
|
||||
)
|
||||
|
||||
return(df)
|
||||
}
|
||||
|
||||
# Update Gene names using the SGD gene list
|
||||
update_gene_names <- function(df, sgd_gene_list) {
|
||||
# Load SGD gene list
|
||||
genes <- read.delim(file = sgd_gene_list,
|
||||
quote = "", header = FALSE,
|
||||
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))
|
||||
|
||||
# Create a named vector for mapping ORF to GeneName
|
||||
gene_map <- setNames(genes$V5, genes$V4)
|
||||
# Vectorized match to find the GeneName from gene_map
|
||||
mapped_genes <- gene_map[df$ORF]
|
||||
# Replace NAs in mapped_genes with original Gene names (preserves existing Gene names if ORF is not found)
|
||||
updated_genes <- ifelse(is.na(mapped_genes) | df$OrfRep == "YDL227C", df$Gene, mapped_genes)
|
||||
# Ensure Gene is not left blank or incorrectly updated to "OCT1"
|
||||
df <- df %>%
|
||||
mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes))
|
||||
|
||||
return(df)
|
||||
}
|
||||
|
||||
# Calculate summary statistics for all variables
|
||||
calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
|
||||
df <- df %>%
|
||||
mutate(across(all_of(variables), ~ ifelse(. == 0, NA, .)))
|
||||
|
||||
summary_stats <- df %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
summarise(
|
||||
N = sum(!is.na(L)),
|
||||
across(all_of(variables), list(
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
se = ~sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1)
|
||||
), .names = "{.fn}_{.col}")
|
||||
)
|
||||
|
||||
df_cleaned <- df %>%
|
||||
select(-any_of(names(summary_stats)))
|
||||
|
||||
df_with_stats <- left_join(df_cleaned, summary_stats, by = group_vars)
|
||||
|
||||
return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
|
||||
}
|
||||
|
||||
|
||||
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
||||
|
||||
# Calculate total concentration variables
|
||||
total_conc_num <- length(unique(df$conc_num))
|
||||
num_non_removed_concs <- total_conc_num - sum(df$DB, na.rm = TRUE) - 1
|
||||
|
||||
# Pull the background means and standard deviations from zero concentration
|
||||
bg_means <- list(
|
||||
L = df %>% filter(conc_num_factor == 0) %>% pull(mean_L) %>% first(),
|
||||
K = df %>% filter(conc_num_factor == 0) %>% pull(mean_K) %>% first(),
|
||||
r = df %>% filter(conc_num_factor == 0) %>% pull(mean_r) %>% first(),
|
||||
AUC = df %>% filter(conc_num_factor == 0) %>% pull(mean_AUC) %>% first()
|
||||
)
|
||||
|
||||
bg_sd <- list(
|
||||
L = df %>% filter(conc_num_factor == 0) %>% pull(sd_L) %>% first(),
|
||||
K = df %>% filter(conc_num_factor == 0) %>% pull(sd_K) %>% first(),
|
||||
r = df %>% filter(conc_num_factor == 0) %>% pull(sd_r) %>% first(),
|
||||
AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
|
||||
)
|
||||
|
||||
interaction_scores <- df %>%
|
||||
mutate(
|
||||
WT_L = df$mean_L,
|
||||
WT_K = df$mean_K,
|
||||
WT_r = df$mean_r,
|
||||
WT_AUC = df$mean_AUC,
|
||||
WT_sd_L = df$sd_L,
|
||||
WT_sd_K = df$sd_K,
|
||||
WT_sd_r = df$sd_r,
|
||||
WT_sd_AUC = df$sd_AUC
|
||||
) %>%
|
||||
group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>%
|
||||
mutate(
|
||||
N = sum(!is.na(L)),
|
||||
NG = sum(NG, na.rm = TRUE),
|
||||
DB = sum(DB, na.rm = TRUE),
|
||||
SM = sum(SM, na.rm = TRUE),
|
||||
across(all_of(variables), list(
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
se = ~sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1)
|
||||
), .names = "{.fn}_{.col}")
|
||||
) %>%
|
||||
ungroup()
|
||||
|
||||
interaction_scores <- interaction_scores %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
mutate(
|
||||
Raw_Shift_L = mean_L[[1]] - bg_means$L,
|
||||
Raw_Shift_K = mean_K[[1]] - bg_means$K,
|
||||
Raw_Shift_r = mean_r[[1]] - bg_means$r,
|
||||
Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC,
|
||||
Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]],
|
||||
Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]],
|
||||
Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]],
|
||||
Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]]
|
||||
)
|
||||
|
||||
interaction_scores <- interaction_scores %>%
|
||||
mutate(
|
||||
Exp_L = WT_L + Raw_Shift_L,
|
||||
Delta_L = mean_L - Exp_L,
|
||||
Exp_K = WT_K + Raw_Shift_K,
|
||||
Delta_K = mean_K - Exp_K,
|
||||
Exp_r = WT_r + Raw_Shift_r,
|
||||
Delta_r = mean_r - Exp_r,
|
||||
Exp_AUC = WT_AUC + Raw_Shift_AUC,
|
||||
Delta_AUC = mean_AUC - Exp_AUC
|
||||
)
|
||||
|
||||
interaction_scores <- interaction_scores %>%
|
||||
mutate(
|
||||
Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
|
||||
Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
|
||||
Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
|
||||
Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
|
||||
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L)
|
||||
)
|
||||
|
||||
# Calculate linear models and interaction scores
|
||||
interaction_scores <- interaction_scores %>%
|
||||
mutate(
|
||||
lm_L = lm(Delta_L ~ conc_num_factor),
|
||||
lm_K = lm(Delta_K ~ conc_num_factor),
|
||||
lm_r = lm(Delta_r ~ conc_num_factor),
|
||||
lm_AUC = lm(Delta_AUC ~ conc_num_factor),
|
||||
Zscore_L = Delta_L / WT_sd_L,
|
||||
Zscore_K = Delta_K / WT_sd_K,
|
||||
Zscore_r = Delta_r / WT_sd_r,
|
||||
Zscore_AUC = Delta_AUC / WT_sd_AUC
|
||||
)
|
||||
|
||||
interaction_scores <- interaction_scores %>%
|
||||
mutate(
|
||||
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
|
||||
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
|
||||
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
|
||||
Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
|
||||
)
|
||||
|
||||
interaction_scores_all <- interaction_scores %>%
|
||||
mutate(
|
||||
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
|
||||
Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
|
||||
Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
|
||||
Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
|
||||
lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1],
|
||||
lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1],
|
||||
lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1],
|
||||
lm_Score_AUC = max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1],
|
||||
r_squared_L = summary(lm_L)$r.squared,
|
||||
r_squared_K = summary(lm_K)$r.squared,
|
||||
r_squared_r = summary(lm_r)$r.squared,
|
||||
r_squared_AUC = summary(lm_AUC)$r.squared
|
||||
)
|
||||
|
||||
# Calculate Z_lm for each variable
|
||||
interaction_scores_all <- interaction_scores_all %>%
|
||||
mutate(
|
||||
Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
|
||||
Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
|
||||
Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
|
||||
Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
|
||||
)
|
||||
|
||||
# Arrange results by Z_lm_L and NG
|
||||
interaction_scores_all <- interaction_scores_all %>%
|
||||
arrange(desc(lm_Score_L)) %>%
|
||||
arrange(desc(NG)) %>%
|
||||
ungroup()
|
||||
|
||||
return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
|
||||
}
|
||||
|
||||
generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
|
||||
|
||||
`%||%` <- function(a, b) if (!is.null(a)) a else b
|
||||
|
||||
# Generalized plot generation
|
||||
plots <- lapply(plot_configs, function(config) {
|
||||
|
||||
df <- config$df
|
||||
plot <- ggplot(df, aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var))))
|
||||
|
||||
# Rank plots with SD annotations
|
||||
if (config$plot_type == "rank") {
|
||||
plot <- plot + geom_point(size = 0.1, shape = 3)
|
||||
|
||||
# Adding SD bands
|
||||
if (!is.null(config$sd_band)) {
|
||||
for (i in seq_len(config$sd_band)) {
|
||||
plot <- plot +
|
||||
annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) +
|
||||
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) +
|
||||
geom_hline(yintercept = c(-i, i), color = "gray")
|
||||
}
|
||||
}
|
||||
|
||||
# Optionally add enhancer/suppressor count annotations
|
||||
if (!is.null(config$enhancer_label)) {
|
||||
plot <- plot + annotate("text", x = config$enhancer_label$x,
|
||||
y = config$enhancer_label$y, label = config$enhancer_label$label) +
|
||||
annotate("text", x = config$suppressor_label$x,
|
||||
y = config$suppressor_label$y, label = config$suppressor_label$label)
|
||||
}
|
||||
}
|
||||
|
||||
# Correlation plots
|
||||
if (config$plot_type == "correlation") {
|
||||
plot <- plot + geom_point(shape = 3, color = "gray70") +
|
||||
geom_smooth(method = "lm", color = "tomato3") +
|
||||
annotate("text", x = 0, y = 0, label = config$correlation_text)
|
||||
}
|
||||
|
||||
# General scatter/boxplot/density handling
|
||||
if (!is.null(config$y_var)) {
|
||||
plot <- plot + aes(y = !!sym(config$y_var))
|
||||
|
||||
y_mean_col <- paste0("mean_", config$y_var)
|
||||
y_sd_col <- paste0("sd_", config$y_var)
|
||||
|
||||
if (config$y_var == "delta_bg" && config$plot_type == "scatter") {
|
||||
plot <- plot + geom_point(shape = 3, size = 0.2, position = "jitter") +
|
||||
geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
|
||||
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
|
||||
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
|
||||
} else if (config$error_bar %||% FALSE) {
|
||||
plot <- plot +
|
||||
geom_point(shape = 3, size = 0.2) +
|
||||
geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
|
||||
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
|
||||
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
|
||||
}
|
||||
}
|
||||
|
||||
# Plot type selection
|
||||
plot <- switch(config$plot_type,
|
||||
"box" = plot + geom_boxplot(),
|
||||
"density" = plot + geom_density(),
|
||||
"bar" = plot + geom_bar(stat = "identity"),
|
||||
plot + geom_point() + geom_smooth(method = "lm", se = FALSE))
|
||||
|
||||
# Adding y-limits if provided
|
||||
if (!is.null(config$ylim_vals)) {
|
||||
plot <- plot + coord_cartesian(ylim = config$ylim_vals)
|
||||
}
|
||||
|
||||
# Setting legend position, titles, and labels
|
||||
legend_position <- config$legend_position %||% "bottom"
|
||||
plot <- plot + ggtitle(config$title) + theme_Publication(legend_position = legend_position)
|
||||
|
||||
if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label)
|
||||
if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
|
||||
|
||||
# Adding text annotations if provided
|
||||
if (!is.null(config$annotations)) {
|
||||
for (annotation in config$annotations) {
|
||||
plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label)
|
||||
}
|
||||
}
|
||||
|
||||
return(plot)
|
||||
})
|
||||
|
||||
# If grid_layout is provided, arrange plots in a grid and save in a single PDF
|
||||
if (!is.null(grid_layout)) {
|
||||
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
|
||||
|
||||
# Loop through plots in chunks defined by ncol and nrow
|
||||
for (start_idx in seq(1, length(plots), by = grid_layout$ncol * grid_layout$nrow)) {
|
||||
end_idx <- min(start_idx + grid_layout$ncol * grid_layout$nrow - 1, length(plots))
|
||||
plot_subset <- plots[start_idx:end_idx]
|
||||
|
||||
# Arrange plots in a grid
|
||||
do.call(grid.arrange, c(plot_subset, ncol = grid_layout$ncol, nrow = grid_layout$nrow))
|
||||
}
|
||||
|
||||
dev.off()
|
||||
|
||||
# Save as HTML (optional)
|
||||
plotly_plots <- lapply(plots, function(plot) {
|
||||
suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))
|
||||
})
|
||||
combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow, margin = 0.05)
|
||||
saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, "_grid.html")), selfcontained = TRUE)
|
||||
|
||||
} else {
|
||||
# Save individual plots to PDF
|
||||
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
|
||||
lapply(plots, print)
|
||||
dev.off()
|
||||
|
||||
# Convert plots to plotly and save as HTML
|
||||
plotly_plots <- lapply(plots, function(plot) {
|
||||
suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))
|
||||
})
|
||||
combined_plot <- subplot(plotly_plots, nrows = length(plotly_plots), margin = 0.05)
|
||||
saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
|
||||
}
|
||||
}
|
||||
|
||||
generate_interaction_plot_configs <- function(df, variables) {
|
||||
plot_configs <- list()
|
||||
|
||||
for (variable in variables) {
|
||||
# Define the y-limits based on the variable being plotted
|
||||
ylim_vals <- switch(variable,
|
||||
"L" = c(-65, 65),
|
||||
"K" = c(-65, 65),
|
||||
"r" = c(-0.65, 0.65),
|
||||
"AUC" = c(-6500, 6500)
|
||||
)
|
||||
|
||||
# Dynamically generate the column names for standard deviation and delta
|
||||
wt_sd_col <- paste0("WT_sd_", variable)
|
||||
delta_var <- paste0("Delta_", variable)
|
||||
z_shift <- paste0("Z_Shift_", variable)
|
||||
z_lm <- paste0("Z_lm_", variable)
|
||||
|
||||
# Set annotations for ZShift, Z lm Score, NG, DB, SM
|
||||
annotations <- list(
|
||||
list(x = 1, y = ifelse(variable == "L", 45, ifelse(variable == "K", 45,
|
||||
ifelse(variable == "r", 0.45, 4500))), label = paste("ZShift =", round(df[[z_shift]], 2))),
|
||||
list(x = 1, y = ifelse(variable == "L", 25, ifelse(variable == "K", 25,
|
||||
ifelse(variable == "r", 0.25, 2500))), label = paste("lm ZScore =", round(df[[z_lm]], 2))),
|
||||
list(x = 1, y = ifelse(variable == "L", -25, ifelse(variable == "K", -25,
|
||||
ifelse(variable == "r", -0.25, -2500))), label = paste("NG =", df$NG)),
|
||||
list(x = 1, y = ifelse(variable == "L", -35, ifelse(variable == "K", -35,
|
||||
ifelse(variable == "r", -0.35, -3500))), label = paste("DB =", df$DB)),
|
||||
list(x = 1, y = ifelse(variable == "L", -45, ifelse(variable == "K", -45,
|
||||
ifelse(variable == "r", -0.45, -4500))), label = paste("SM =", df$SM))
|
||||
)
|
||||
|
||||
# Add scatter plot configuration for this variable
|
||||
plot_configs[[length(plot_configs) + 1]] <- list(
|
||||
df = df,
|
||||
x_var = "conc_num_factor",
|
||||
y_var = delta_var,
|
||||
plot_type = "scatter",
|
||||
title = sprintf("%s %s", df$OrfRep[1], df$Gene[1]),
|
||||
ylim_vals = ylim_vals,
|
||||
annotations = annotations,
|
||||
error_bar = list(
|
||||
ymin = 0 - (2 * df[[wt_sd_col]][1]),
|
||||
ymax = 0 + (2 * df[[wt_sd_col]][1])
|
||||
),
|
||||
x_breaks = unique(df$conc_num_factor),
|
||||
x_labels = unique(as.character(df$conc_num)),
|
||||
x_label = unique(df$Drug[1])
|
||||
)
|
||||
|
||||
# Add box plot configuration for this variable
|
||||
plot_configs[[length(plot_configs) + 1]] <- list(
|
||||
df = df,
|
||||
x_var = "conc_num_factor",
|
||||
y_var = variable,
|
||||
plot_type = "box",
|
||||
title = sprintf("%s %s (Boxplot)", df$OrfRep[1], df$Gene[1]),
|
||||
ylim_vals = ylim_vals,
|
||||
annotations = annotations,
|
||||
error_bar = FALSE, # Boxplots typically don't need error bars
|
||||
x_breaks = unique(df$conc_num_factor),
|
||||
x_labels = unique(as.character(df$conc_num)),
|
||||
x_label = unique(df$Drug[1])
|
||||
)
|
||||
}
|
||||
|
||||
return(plot_configs)
|
||||
}
|
||||
|
||||
generate_rank_plot_configs <- function(df, rank_var, zscore_var, label_prefix) {
|
||||
configs <- list()
|
||||
|
||||
for (sd_band in c(1, 2, 3)) {
|
||||
# Annotated version
|
||||
configs[[length(configs) + 1]] <- list(
|
||||
df = df,
|
||||
x_var = rank_var,
|
||||
y_var = zscore_var,
|
||||
plot_type = "rank",
|
||||
title = paste("Average Z score vs. Rank for", label_prefix, "above", sd_band, "SD"),
|
||||
sd_band = sd_band,
|
||||
enhancer_label = list(
|
||||
x = nrow(df) / 2, y = 10,
|
||||
label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ]))
|
||||
),
|
||||
suppressor_label = list(
|
||||
x = nrow(df) / 2, y = -10,
|
||||
label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
|
||||
)
|
||||
)
|
||||
|
||||
# Non-annotated version
|
||||
configs[[length(configs) + 1]] <- list(
|
||||
df = df,
|
||||
x_var = rank_var,
|
||||
y_var = zscore_var,
|
||||
plot_type = "rank",
|
||||
title = paste("Average Z score vs. Rank for", label_prefix, "above", sd_band, "SD"),
|
||||
sd_band = sd_band
|
||||
)
|
||||
}
|
||||
|
||||
return(configs)
|
||||
}
|
||||
|
||||
generate_correlation_plot_configs <- function(df, lm_list, lm_summaries) {
|
||||
lapply(seq_along(lm_list), function(i) {
|
||||
r_squared <- round(lm_summaries[[i]]$r.squared, 3)
|
||||
list(
|
||||
x_var = names(lm_list)[i][1],
|
||||
y_var = names(lm_list)[i][2],
|
||||
plot_type = "scatter",
|
||||
title = paste("Correlation between", names(lm_list)[i][1], "and", names(lm_list)[i][2]),
|
||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", r_squared)))
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
# Adjust missing values and calculate ranks
|
||||
adjust_missing_and_rank <- function(df, variables) {
|
||||
|
||||
# Adjust missing values in Avg_Zscore and Z_lm columns, and apply rank to the specified variables
|
||||
df <- df %>%
|
||||
mutate(across(all_of(variables), list(
|
||||
Avg_Zscore = ~ if_else(is.na(get(paste0("Avg_Zscore_", cur_column()))), 0.001, get(paste0("Avg_Zscore_", cur_column()))),
|
||||
Z_lm = ~ if_else(is.na(get(paste0("Z_lm_", cur_column()))), 0.001, get(paste0("Z_lm_", cur_column()))),
|
||||
Rank = ~ rank(get(paste0("Avg_Zscore_", cur_column()))),
|
||||
Rank_lm = ~ rank(get(paste0("Z_lm_", cur_column())))
|
||||
), .names = "{fn}_{col}"))
|
||||
|
||||
return(df)
|
||||
}
|
||||
|
||||
main <- function() {
|
||||
lapply(names(args$experiments), function(exp_name) {
|
||||
exp <- args$experiments[[exp_name]]
|
||||
exp_path <- exp$path
|
||||
exp_sd <- exp$sd
|
||||
out_dir <- file.path(exp_path, "zscores")
|
||||
out_dir_qc <- file.path(exp_path, "zscores", "qc")
|
||||
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
dir.create(out_dir_qc, recursive = TRUE, showWarnings = FALSE)
|
||||
|
||||
# Load and process data
|
||||
df <- load_and_process_data(args$easy_results_file, sd = exp_sd)
|
||||
df <- update_gene_names(df, args$sgd_gene_list)
|
||||
|
||||
max_conc <- max(df$conc_num_factor)
|
||||
|
||||
# QC steps and filtering
|
||||
df_above_tolerance <- df %>% filter(DB == 1)
|
||||
|
||||
# Calculate the half-medians for `L` and `K` for rows above tolerance
|
||||
L_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
|
||||
K_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
|
||||
|
||||
# Get the number of rows that are above tolerance
|
||||
rows_to_remove <- nrow(df_above_tolerance)
|
||||
|
||||
# Set L, r, K, and AUC to NA for rows that are above tolerance
|
||||
df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .)))
|
||||
|
||||
# Calculate summary statistics for all strains, including both background and the deletions
|
||||
message("Calculating summary statistics for all strains")
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
ss <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
summary_stats <- ss$summary_stats
|
||||
df_na_stats <- ss$df_with_stats
|
||||
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||
|
||||
print("Summary stats:")
|
||||
print(head(summary_stats), width = 200)
|
||||
|
||||
# Remove rows with 0 values in L
|
||||
df_no_zeros <- df_na %>% filter(L > 0)
|
||||
|
||||
# Additional filtering for non-finite values
|
||||
df_na_filtered <- df_na %>%
|
||||
filter(if_any(c(L), ~ !is.finite(.))) %>%
|
||||
{
|
||||
if (nrow(.) > 0) {
|
||||
message("Removing non-finite rows:\n")
|
||||
print(head(., n = 10))
|
||||
}
|
||||
df_na %>% filter(if_all(c(L), is.finite))
|
||||
}
|
||||
|
||||
# Filter data within and outside 2SD
|
||||
message("Filtering by 2SD of K")
|
||||
df_na_within_2sd_k <- df_na_stats %>%
|
||||
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
|
||||
df_na_outside_2sd_k <- df_na_stats %>%
|
||||
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
|
||||
|
||||
# Summary statistics for within and outside 2SD of K
|
||||
message("Calculating summary statistics for L within 2SD of K")
|
||||
# TODO We're omitting the original z_max calculation, not sure if needed?
|
||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
l_within_2sd_k_stats <- ss$summary_stats
|
||||
df_na_l_within_2sd_k_stats <- ss$df_with_stats
|
||||
message("Calculating summary statistics for L outside 2SD of K")
|
||||
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
l_outside_2sd_k_stats <- ss$summary_stats
|
||||
df_na_l_outside_2sd_k_stats <- ss$df_with_stats
|
||||
# Write CSV files
|
||||
write.csv(l_within_2sd_k_stats, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE)
|
||||
write.csv(l_outside_2sd_k_stats, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE)
|
||||
|
||||
# Plots
|
||||
|
||||
# Print quality control graphs before removing data due to contamination and
|
||||
# adjusting missing data to max theoretical values
|
||||
l_vs_k_plots <- list(
|
||||
list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
|
||||
title = "Raw L vs K before QC",
|
||||
color_var = "conc_num",
|
||||
legend_position = "right"
|
||||
)
|
||||
)
|
||||
|
||||
above_threshold_plots <- list(
|
||||
list(df = df_above_tolerance, x_var = "L", y_var = "K", plot_type = "scatter",
|
||||
title = paste("Raw L vs K for strains above delta background threshold of", df_above_tolerance$delta_bg_tolerance[[1]], "or above"),
|
||||
color_var = "conc_num",
|
||||
annotations = list(
|
||||
list(
|
||||
x = L_half_median,
|
||||
y = K_half_median,
|
||||
label = paste("Strains above delta background tolerance =", nrow(df_above_tolerance))
|
||||
)
|
||||
),
|
||||
error_bar = FALSE,
|
||||
legend_position = "right"
|
||||
)
|
||||
)
|
||||
|
||||
frequency_delta_bg_plots <- list(
|
||||
list(df = df, x_var = "delta_bg", y_var = NULL, plot_type = "density",
|
||||
title = "Density plot for Delta Background by Conc All Data",
|
||||
color_var = "conc_num",
|
||||
x_label = "Delta Background",
|
||||
y_label = "Density",
|
||||
error_bar = FALSE,
|
||||
legend_position = "right"
|
||||
),
|
||||
list(df = df, x_var = "delta_bg", y_var = NULL, plot_type = "bar",
|
||||
title = "Bar plot for Delta Background by Conc All Data",
|
||||
color_var = "conc_num",
|
||||
x_label = "Delta Background",
|
||||
y_label = "Count",
|
||||
error_bar = FALSE,
|
||||
legend_position = "right"
|
||||
)
|
||||
)
|
||||
|
||||
plate_analysis_plots <- list()
|
||||
for (plot_type in c("scatter", "box")) {
|
||||
variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||
for (var in variables) {
|
||||
for (stage in c("before", "after")) {
|
||||
if (stage == "before") {
|
||||
df_plot <- df
|
||||
} else {
|
||||
df_plot <- df_na # TODO use df_na_filtered if necessary
|
||||
}
|
||||
|
||||
# Set error_bar = TRUE only for scatter plots
|
||||
error_bar <- ifelse(plot_type == "scatter", TRUE, FALSE)
|
||||
|
||||
# Create the plot configuration
|
||||
plot_config <- list(df = df_plot, x_var = "scan", y_var = var, plot_type = plot_type,
|
||||
title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
|
||||
error_bar = error_bar, color_var = "conc_num")
|
||||
|
||||
plate_analysis_plots <- append(plate_analysis_plots, list(plot_config))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
plate_analysis_no_zero_plots <- list()
|
||||
for (plot_type in c("scatter", "box")) {
|
||||
variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||
for (var in variables) {
|
||||
|
||||
# Set error_bar = TRUE only for scatter plots
|
||||
error_bar <- ifelse(plot_type == "scatter", TRUE, FALSE)
|
||||
|
||||
# Create the plot configuration
|
||||
plot_config <- list(
|
||||
df = df_no_zeros,
|
||||
x_var = "scan",
|
||||
y_var = var,
|
||||
plot_type = plot_type,
|
||||
title = paste("Plate analysis by Drug Conc for", var, "after quality control"),
|
||||
error_bar = error_bar,
|
||||
color_var = "conc_num"
|
||||
)
|
||||
plate_analysis_plots <- append(plate_analysis_plots, list(plot_config))
|
||||
}
|
||||
}
|
||||
|
||||
l_outside_2sd_k_plots <- list(
|
||||
list(df = X_outside_2SD_K, x_var = "l", y_var = "K", plot_type = "scatter",
|
||||
title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
|
||||
color_var = "conc_num",
|
||||
legend_position = "right"
|
||||
)
|
||||
)
|
||||
|
||||
delta_bg_outside_2sd_k_plots <- list(
|
||||
list(df = X_outside_2SD_K, x_var = "delta_bg", y_var = "K", plot_type = "scatter",
|
||||
title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
|
||||
color_var = "conc_num",
|
||||
legend_position = "right"
|
||||
)
|
||||
)
|
||||
|
||||
# Generate and save plots for each QC step
|
||||
message("Generating QC plots")
|
||||
generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
|
||||
generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
|
||||
generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
|
||||
generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
|
||||
generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
|
||||
generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
|
||||
generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
|
||||
|
||||
# Clean up
|
||||
rm(df, df_above_tolerance, df_no_zeros)
|
||||
|
||||
# TODO: Originally this filtered L NA's
|
||||
# Let's try to avoid for now since stats have already been calculated
|
||||
|
||||
# Process background strains
|
||||
bg_strains <- c("YDL227C")
|
||||
lapply(bg_strains, function(strain) {
|
||||
|
||||
message("Processing background strain: ", strain)
|
||||
|
||||
# Handle missing data by setting zero values to NA
|
||||
# and then removing any rows with NA in L col
|
||||
df_bg <- df_na %>%
|
||||
filter(OrfRep == strain) %>%
|
||||
mutate(
|
||||
L = if_else(L == 0, NA, L),
|
||||
K = if_else(K == 0, NA, K),
|
||||
r = if_else(r == 0, NA, r),
|
||||
AUC = if_else(AUC == 0, NA, AUC)
|
||||
) %>%
|
||||
filter(!is.na(L))
|
||||
|
||||
# Recalculate summary statistics for the background strain
|
||||
message("Calculating summary statistics for background strain")
|
||||
ss <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
summary_stats_bg <- ss$summary_stats
|
||||
df_bg_stats <- ss$df_with_stats
|
||||
write.csv(summary_stats_bg,
|
||||
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
||||
row.names = FALSE)
|
||||
|
||||
# Filter reference and deletion strains
|
||||
# Formerly X2_RF (reference strains)
|
||||
df_reference <- df_na_stats %>%
|
||||
filter(OrfRep == strain) %>%
|
||||
mutate(SM = 0)
|
||||
|
||||
# Formerly X2 (deletion strains)
|
||||
df_deletion <- df_na_stats %>%
|
||||
filter(OrfRep != strain) %>%
|
||||
mutate(SM = 0)
|
||||
|
||||
# Set the missing values to the highest theoretical value at each drug conc for L
|
||||
# Leave other values as 0 for the max/min
|
||||
reference_strain <- df_reference %>%
|
||||
group_by(conc_num) %>%
|
||||
mutate(
|
||||
max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||
SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM),
|
||||
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
|
||||
ungroup()
|
||||
|
||||
# Ditto for deletion strains
|
||||
deletion_strains <- df_deletion %>%
|
||||
group_by(conc_num) %>%
|
||||
mutate(
|
||||
max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||
SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM),
|
||||
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
|
||||
ungroup()
|
||||
|
||||
# Calculate interactions
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
message("Calculating interaction scores")
|
||||
print("Reference strain:")
|
||||
print(head(reference_strain))
|
||||
reference_results <- calculate_interaction_scores(reference_strain, max_conc, variables)
|
||||
print("Deletion strains:")
|
||||
print(head(deletion_strains))
|
||||
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables)
|
||||
|
||||
zscores_calculations_reference <- reference_results$zscores_calculations
|
||||
zscores_interactions_reference <- reference_results$zscores_interactions
|
||||
zscores_calculations <- deletion_results$zscores_calculations
|
||||
zscores_interactions <- deletion_results$zscores_interactions
|
||||
|
||||
# Writing Z-Scores to file
|
||||
write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
|
||||
write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE)
|
||||
write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
|
||||
write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
|
||||
|
||||
# Create interaction plots
|
||||
reference_plot_configs <- generate_interaction_plot_configs(df_reference, variables)
|
||||
deletion_plot_configs <- generate_interaction_plot_configs(df_deletion, variables)
|
||||
generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
|
||||
generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
|
||||
|
||||
# Define conditions for enhancers and suppressors
|
||||
# TODO Add to study config file?
|
||||
threshold <- 2
|
||||
enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= threshold
|
||||
suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -threshold
|
||||
enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= threshold
|
||||
suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -threshold
|
||||
|
||||
# Subset data
|
||||
enhancers_L <- zscores_interactions[enhancer_condition_L, ]
|
||||
suppressors_L <- zscores_interactions[suppressor_condition_L, ]
|
||||
enhancers_K <- zscores_interactions[enhancer_condition_K, ]
|
||||
suppressors_K <- zscores_interactions[suppressor_condition_K, ]
|
||||
|
||||
# Save enhancers and suppressors
|
||||
message("Writing enhancer/suppressor csv files")
|
||||
write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_L.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_L.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_K.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K.csv"), row.names = FALSE)
|
||||
|
||||
# Combine conditions for enhancers and suppressors
|
||||
enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ]
|
||||
enhancers_and_suppressors_K <- zscores_interactions[enhancer_condition_K | suppressor_condition_K, ]
|
||||
|
||||
# Save combined enhancers and suppressors
|
||||
write.csv(enhancers_and_suppressors_L,
|
||||
file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_and_Suppressors_L.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_and_suppressors_K,
|
||||
file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_and_Suppressors_K.csv"), row.names = FALSE)
|
||||
|
||||
# Handle linear model based enhancers and suppressors
|
||||
lm_threshold <- 2
|
||||
enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= lm_threshold, ]
|
||||
suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -lm_threshold, ]
|
||||
enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= lm_threshold, ]
|
||||
suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -lm_threshold, ]
|
||||
|
||||
# Save linear model based enhancers and suppressors
|
||||
message("Writing linear model enhancer/suppressor csv files")
|
||||
write.csv(enhancers_lm_L,
|
||||
file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_L_lm.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_lm_L,
|
||||
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_L_lm.csv"), row.names = FALSE)
|
||||
write.csv(enhancers_lm_K,
|
||||
file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_K_lm.csv"), row.names = FALSE)
|
||||
write.csv(suppressors_lm_K,
|
||||
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
|
||||
|
||||
zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions)
|
||||
|
||||
# Generate all rank plot configurations for L and K
|
||||
rank_plot_configs <- c(
|
||||
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_L", "Avg_Zscore_L", "L"),
|
||||
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K")
|
||||
)
|
||||
|
||||
# Generate and save rank plots
|
||||
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
|
||||
plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2))
|
||||
|
||||
# # Correlation plots
|
||||
# lm_list <- list(
|
||||
# lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
|
||||
# lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
|
||||
# lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered),
|
||||
# lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered),
|
||||
# lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
|
||||
# lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
|
||||
# )
|
||||
|
||||
lm_summaries <- lapply(lm_list, summary)
|
||||
correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
|
||||
generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs)
|
||||
})
|
||||
})
|
||||
}
|
||||
main()
|
||||
@@ -1498,7 +1498,7 @@ calculate_interaction_zscores() {
|
||||
* Background values are reported in the results sheet and so could also be analyzed there.
|
||||
EOF
|
||||
|
||||
declare script="$APPS_DIR/r/calculate_interaction_zscores5.R"
|
||||
declare script="$APPS_DIR/r/calculate_interaction_zscores.R"
|
||||
declare -a out_paths=("${1:-"$STUDY_RESULTS_DIR/zscores"}")
|
||||
for path in "${EXP_PATHS[@]}"; do
|
||||
out_paths+=("${path}/zscores")
|
||||
|
||||
Reference in New Issue
Block a user