|
@@ -37,28 +37,30 @@ parse_arguments <- function() {
|
|
} else {
|
|
} else {
|
|
commandArgs(trailingOnly = TRUE)
|
|
commandArgs(trailingOnly = TRUE)
|
|
}
|
|
}
|
|
-
|
|
|
|
- # Extract paths, names, and standard deviations
|
|
|
|
- paths <- args[seq(4, length(args), by = 3)]
|
|
|
|
- names <- args[seq(5, length(args), by = 3)]
|
|
|
|
- sds <- as.numeric(args[seq(6, length(args), by = 3)])
|
|
|
|
-
|
|
|
|
- # Normalize paths
|
|
|
|
- normalized_paths <- normalizePath(paths, mustWork = FALSE)
|
|
|
|
-
|
|
|
|
- # Create named list of experiments
|
|
|
|
|
|
+
|
|
|
|
+ out_dir <- normalizePath(args[1], mustWork = FALSE)
|
|
|
|
+ sgd_gene_list <- normalizePath(args[2], mustWork = FALSE)
|
|
|
|
+ easy_results_file <- normalizePath(args[3], mustWork = FALSE)
|
|
|
|
+
|
|
|
|
+ # The remaining arguments should be in groups of 3
|
|
|
|
+ exp_args <- args[-(1:3)]
|
|
|
|
+ if (length(exp_args) %% 3 != 0) {
|
|
|
|
+ stop("Experiment arguments should be in groups of 3: path, name, sd.")
|
|
|
|
+ }
|
|
|
|
+
|
|
experiments <- list()
|
|
experiments <- list()
|
|
- for (i in seq_along(paths)) {
|
|
|
|
- experiments[[names[i]]] <- list(
|
|
|
|
- path = normalized_paths[i],
|
|
|
|
- sd = sds[i]
|
|
|
|
|
|
+ for (i in seq(1, length(exp_args), by = 3)) {
|
|
|
|
+ exp_name <- exp_args[i + 1]
|
|
|
|
+ experiments[[exp_name]] <- list(
|
|
|
|
+ path = normalizePath(exp_args[i], mustWork = FALSE),
|
|
|
|
+ sd = as.numeric(exp_args[i + 2])
|
|
)
|
|
)
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
list(
|
|
list(
|
|
- out_dir = normalizePath(args[1], mustWork = FALSE),
|
|
|
|
- sgd_gene_list = normalizePath(args[2], mustWork = FALSE),
|
|
|
|
- easy_results_file = normalizePath(args[3], mustWork = FALSE),
|
|
|
|
|
|
+ out_dir = out_dir,
|
|
|
|
+ sgd_gene_list = sgd_gene_list,
|
|
|
|
+ easy_results_file = easy_results_file,
|
|
experiments = experiments
|
|
experiments = experiments
|
|
)
|
|
)
|
|
}
|
|
}
|
|
@@ -81,9 +83,11 @@ theme_publication <- function(base_size = 14, base_family = "sans", legend_posit
|
|
plot.background = element_rect(colour = NA),
|
|
plot.background = element_rect(colour = NA),
|
|
panel.border = element_rect(colour = NA),
|
|
panel.border = element_rect(colour = NA),
|
|
axis.title = element_text(face = "bold", size = rel(1)),
|
|
axis.title = element_text(face = "bold", size = rel(1)),
|
|
- axis.title.y = element_text(angle = 90, vjust = 2),
|
|
|
|
- axis.title.x = element_text(vjust = -0.2),
|
|
|
|
|
|
+ axis.title.y = element_text(angle = 90, vjust = 2, size = 18),
|
|
|
|
+ axis.title.x = element_text(vjust = -0.2, size = 18),
|
|
axis.line = element_line(colour = "black"),
|
|
axis.line = element_line(colour = "black"),
|
|
|
|
+ axis.text.x = element_text(size = 16),
|
|
|
|
+ axis.text.y = element_text(size = 16),
|
|
panel.grid.major = element_line(colour = "#f0f0f0"),
|
|
panel.grid.major = element_line(colour = "#f0f0f0"),
|
|
panel.grid.minor = element_blank(),
|
|
panel.grid.minor = element_blank(),
|
|
legend.key = element_rect(colour = NA),
|
|
legend.key = element_rect(colour = NA),
|
|
@@ -111,27 +115,42 @@ scale_colour_publication <- function(...) {
|
|
|
|
|
|
# Load the initial dataframe from the easy_results_file
|
|
# Load the initial dataframe from the easy_results_file
|
|
load_and_process_data <- function(easy_results_file, sd = 3) {
|
|
load_and_process_data <- function(easy_results_file, sd = 3) {
|
|
- df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
|
|
|
-
|
|
|
|
|
|
+ df <- read.delim(
|
|
|
|
+ easy_results_file,
|
|
|
|
+ skip = 2,
|
|
|
|
+ stringsAsFactors = FALSE,
|
|
|
|
+ row.names = 1,
|
|
|
|
+ strip.white = TRUE
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Filter and rename columns
|
|
|
|
+ df <- df %>%
|
|
|
|
+ filter(!is.na(ORF) & ORF != "") %>%
|
|
|
|
+ filter(!Gene %in% c("BLANK", "Blank", "blank")) %>%
|
|
|
|
+ filter(Drug != "BMH21") %>%
|
|
|
|
+ rename(
|
|
|
|
+ L = l,
|
|
|
|
+ num = Num.,
|
|
|
|
+ AUC = AUC96,
|
|
|
|
+ scan = Scan,
|
|
|
|
+ last_bg = LstBackgrd,
|
|
|
|
+ first_bg = X1stBackgrd
|
|
|
|
+ ) %>%
|
|
|
|
+ mutate(across(c(Col, Row, num, L, K, r, scan, AUC, last_bg, first_bg), as.numeric))
|
|
|
|
+
|
|
|
|
+ # Calculate delta background and tolerance
|
|
df <- df %>%
|
|
df <- df %>%
|
|
- filter(!(.[[1]] %in% c("", "Scan"))) %>%
|
|
|
|
- filter(!is.na(ORF) & ORF != "" & !Gene %in% c("BLANK", "Blank", "blank") & Drug != "BMH21") %>%
|
|
|
|
- # Rename columns
|
|
|
|
- rename(L = l, num = Num., AUC = AUC96, scan = Scan, last_bg = LstBackgrd, first_bg = X1stBackgrd) %>%
|
|
|
|
mutate(
|
|
mutate(
|
|
- across(c(Col, Row, num, L, K, r, scan, AUC, last_bg, first_bg), as.numeric),
|
|
|
|
delta_bg = last_bg - first_bg,
|
|
delta_bg = last_bg - first_bg,
|
|
delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + (sd * sd(delta_bg, na.rm = TRUE)),
|
|
delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + (sd * sd(delta_bg, na.rm = TRUE)),
|
|
NG = if_else(L == 0 & !is.na(L), 1, 0),
|
|
NG = if_else(L == 0 & !is.na(L), 1, 0),
|
|
DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0),
|
|
DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0),
|
|
SM = 0,
|
|
SM = 0,
|
|
- OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded?
|
|
|
|
|
|
+ OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep),
|
|
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
|
|
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
|
|
conc_num_factor = as.factor(conc_num)
|
|
conc_num_factor = as.factor(conc_num)
|
|
- # conc_num_factor = factor(conc_num, levels = sort(unique(conc_num)))
|
|
|
|
- # conc_num_numeric = as.numeric(conc_num_factor) - 1
|
|
|
|
)
|
|
)
|
|
-
|
|
|
|
|
|
+
|
|
return(df)
|
|
return(df)
|
|
}
|
|
}
|
|
|
|
|
|
@@ -439,34 +458,43 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
|
|
dev.off()
|
|
dev.off()
|
|
|
|
|
|
# Combine and save interactive HTML plots
|
|
# Combine and save interactive HTML plots
|
|
- combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow %||% length(plotly_plots), margin = 0.05)
|
|
|
|
|
|
+ combined_plot <- subplot(plotly_plots,
|
|
|
|
+ nrows = ifelse(is.null(grid_layout$nrow), length(plotly_plots), grid_layout$nrow),
|
|
|
|
+ margin = 0.05)
|
|
saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
|
|
saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
|
|
}
|
|
}
|
|
|
|
|
|
generate_scatter_plot <- function(plot, config) {
|
|
generate_scatter_plot <- function(plot, config) {
|
|
|
|
|
|
- # 1. Determine Shape, Size, and Position for geom_point
|
|
|
|
- shape <- if (!is.null(config$shape)) config$shape else 3
|
|
|
|
- size <- if (!is.null(config$size)) {
|
|
|
|
- config$size
|
|
|
|
- } else {
|
|
|
|
- if (!is.null(config$delta_bg_point) && config$delta_bg_point) 0.2
|
|
|
|
- else if (!is.null(config$gene_point) && config$gene_point) 0.2
|
|
|
|
- else 0.1
|
|
|
|
|
|
+ # Build the aes mapping with color if specified
|
|
|
|
+ if (!is.null(config$color_var)) {
|
|
|
|
+ plot <- plot + aes(color = .data[[config$color_var]])
|
|
}
|
|
}
|
|
|
|
|
|
- position <- if (!is.null(config$delta_bg_point) && config$delta_bg_point) {
|
|
|
|
- "identity"
|
|
|
|
- } else if (!is.null(config$gene_point) && config$gene_point) {
|
|
|
|
- "jitter"
|
|
|
|
- } else {
|
|
|
|
- if (!is.null(config$position) && config$position == "jitter") "jitter" else "identity"
|
|
|
|
- }
|
|
|
|
|
|
+ # Determine Shape, Size, and Position for geom_point
|
|
|
|
+ shape <- if (!is.null(config$shape)) config$shape else 3
|
|
|
|
+ size <- if (!is.null(config$size)) config$size else 0.1
|
|
|
|
+ position <- if (!is.null(config$position) && config$position == "jitter") "jitter" else "identity"
|
|
|
|
|
|
- # 2. Add geom_point with determined parameters
|
|
|
|
- plot <- plot + geom_point(shape = shape, size = size, position = position)
|
|
|
|
|
|
+ # Add geom_point with determined parameters
|
|
|
|
+ plot <- plot + geom_point(
|
|
|
|
+ aes(color = .data[[config$color_var]]),
|
|
|
|
+ shape = shape,
|
|
|
|
+ size = size,
|
|
|
|
+ position = position
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ if (!is.null(config$cyan_points)) {
|
|
|
|
+ plot <- plot + geom_point(
|
|
|
|
+ data = subset(config$df, is_cyan_point == TRUE),
|
|
|
|
+ aes(x = .data[[config$x_var]], y = .data[[config$y_var]]),
|
|
|
|
+ color = "cyan",
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.5
|
|
|
|
+ )
|
|
|
|
+ }
|
|
|
|
|
|
- # 3. Add Smooth Line if specified
|
|
|
|
|
|
+ # Add Smooth Line if specified
|
|
if (!is.null(config$add_smooth) && config$add_smooth) {
|
|
if (!is.null(config$add_smooth) && config$add_smooth) {
|
|
if (!is.null(config$lm_line)) {
|
|
if (!is.null(config$lm_line)) {
|
|
plot <- plot +
|
|
plot <- plot +
|
|
@@ -485,7 +513,7 @@ generate_scatter_plot <- function(plot, config) {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- # 4. Add SD Bands if specified
|
|
|
|
|
|
+ # Add SD Bands if specified
|
|
if (!is.null(config$sd_band_values)) {
|
|
if (!is.null(config$sd_band_values)) {
|
|
for (sd_band in config$sd_band_values) {
|
|
for (sd_band in config$sd_band_values) {
|
|
plot <- plot +
|
|
plot <- plot +
|
|
@@ -509,8 +537,24 @@ generate_scatter_plot <- function(plot, config) {
|
|
)
|
|
)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ # Add Rectangles if specified
|
|
|
|
+ if (!is.null(config$rectangles)) {
|
|
|
|
+ for (rect in config$rectangles) {
|
|
|
|
+ plot <- plot + annotate(
|
|
|
|
+ "rect",
|
|
|
|
+ xmin = rect$xmin,
|
|
|
|
+ xmax = rect$xmax,
|
|
|
|
+ ymin = rect$ymin,
|
|
|
|
+ ymax = rect$ymax,
|
|
|
|
+ fill = ifelse(is.null(rect$fill), NA, rect$fill),
|
|
|
|
+ color = ifelse(is.null(rect$color), "black", rect$color),
|
|
|
|
+ alpha = ifelse(is.null(rect$alpha), 0.1, rect$alpha)
|
|
|
|
+ )
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
- # 5. Add Error Bars if specified
|
|
|
|
|
|
+ # Add Error Bars if specified
|
|
if (!is.null(config$error_bar) && config$error_bar && !is.null(config$y_var)) {
|
|
if (!is.null(config$error_bar) && config$error_bar && !is.null(config$y_var)) {
|
|
y_mean_col <- paste0("mean_", config$y_var)
|
|
y_mean_col <- paste0("mean_", config$y_var)
|
|
y_sd_col <- paste0("sd_", config$y_var)
|
|
y_sd_col <- paste0("sd_", config$y_var)
|
|
@@ -525,7 +569,7 @@ generate_scatter_plot <- function(plot, config) {
|
|
)
|
|
)
|
|
}
|
|
}
|
|
|
|
|
|
- # 6. Customize X-axis if specified
|
|
|
|
|
|
+ # Customize X-axis if specified
|
|
if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) {
|
|
if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) {
|
|
plot <- plot +
|
|
plot <- plot +
|
|
scale_x_discrete(
|
|
scale_x_discrete(
|
|
@@ -535,17 +579,17 @@ generate_scatter_plot <- function(plot, config) {
|
|
)
|
|
)
|
|
}
|
|
}
|
|
|
|
|
|
- # 7. Apply coord_cartesian if specified
|
|
|
|
|
|
+ # Apply coord_cartesian if specified
|
|
if (!is.null(config$coord_cartesian)) {
|
|
if (!is.null(config$coord_cartesian)) {
|
|
plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
|
|
plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
|
|
}
|
|
}
|
|
|
|
|
|
- # 8. Set Y-axis limits if specified
|
|
|
|
|
|
+ # Set Y-axis limits if specified
|
|
if (!is.null(config$ylim_vals)) {
|
|
if (!is.null(config$ylim_vals)) {
|
|
plot <- plot + scale_y_continuous(limits = config$ylim_vals)
|
|
plot <- plot + scale_y_continuous(limits = config$ylim_vals)
|
|
}
|
|
}
|
|
|
|
|
|
- # 9. Add Annotations if specified
|
|
|
|
|
|
+ # Add Annotations if specified
|
|
if (!is.null(config$annotations)) {
|
|
if (!is.null(config$annotations)) {
|
|
for (annotation in config$annotations) {
|
|
for (annotation in config$annotations) {
|
|
plot <- plot +
|
|
plot <- plot +
|
|
@@ -559,12 +603,12 @@ generate_scatter_plot <- function(plot, config) {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- # 10. Add Title if specified
|
|
|
|
|
|
+ # Add Title if specified
|
|
if (!is.null(config$title)) {
|
|
if (!is.null(config$title)) {
|
|
plot <- plot + ggtitle(config$title)
|
|
plot <- plot + ggtitle(config$title)
|
|
}
|
|
}
|
|
|
|
|
|
- # 11. Adjust Legend Position if specified
|
|
|
|
|
|
+ # Adjust Legend Position if specified
|
|
if (!is.null(config$legend_position)) {
|
|
if (!is.null(config$legend_position)) {
|
|
plot <- plot + theme(legend.position = config$legend_position)
|
|
plot <- plot + theme(legend.position = config$legend_position)
|
|
}
|
|
}
|
|
@@ -692,121 +736,131 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|
return(configs)
|
|
return(configs)
|
|
}
|
|
}
|
|
|
|
|
|
-generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE, adjust = FALSE) {
|
|
|
|
-
|
|
|
|
- # Define SD bands
|
|
|
|
- sd_bands <- c(1, 2, 3)
|
|
|
|
-
|
|
|
|
- # Initialize list to store plot configurations
|
|
|
|
- configs <- list()
|
|
|
|
-
|
|
|
|
- # SD-based plots for L and K
|
|
|
|
- for (variable in c("L", "K")) {
|
|
|
|
- for (sd_band in sd_bands) {
|
|
|
|
-
|
|
|
|
- # Determine columns based on whether it's lm or not
|
|
|
|
- if (is_lm) {
|
|
|
|
- rank_var <- paste0(variable, "_Rank_lm")
|
|
|
|
- zscore_var <- paste0("Z_lm_", variable)
|
|
|
|
- y_label <- paste("Int Z score", variable)
|
|
|
|
- } else {
|
|
|
|
- rank_var <- paste0(variable, "_Rank")
|
|
|
|
- zscore_var <- paste0("Avg_Zscore_", variable)
|
|
|
|
- y_label <- paste("Avg Z score", variable)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- # Annotated Plot Configuration
|
|
|
|
- configs[[length(configs) + 1]] <- list(
|
|
|
|
- df = df_filtered,
|
|
|
|
- x_var = rank_var,
|
|
|
|
- y_var = zscore_var,
|
|
|
|
- plot_type = "scatter",
|
|
|
|
- title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD"),
|
|
|
|
- sd_band = sd_band,
|
|
|
|
- annotations = list(
|
|
|
|
- list(
|
|
|
|
- x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
- y = 10,
|
|
|
|
- label = paste("Deletion Enhancers =", sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE))
|
|
|
|
- ),
|
|
|
|
- list(
|
|
|
|
- x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
- y = -10,
|
|
|
|
- label = paste("Deletion Suppressors =", sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE))
|
|
|
|
- )
|
|
|
|
- ),
|
|
|
|
- sd_band_values = sd_band,
|
|
|
|
- shape = 3,
|
|
|
|
- size = 0.1
|
|
|
|
- )
|
|
|
|
-
|
|
|
|
- # Non-Annotated Plot Configuration
|
|
|
|
- configs[[length(configs) + 1]] <- list(
|
|
|
|
- df = df_filtered,
|
|
|
|
- x_var = rank_var,
|
|
|
|
- y_var = zscore_var,
|
|
|
|
- plot_type = "scatter",
|
|
|
|
- title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD No Annotations"),
|
|
|
|
- sd_band = sd_band,
|
|
|
|
- annotations = NULL,
|
|
|
|
- sd_band_values = sd_band,
|
|
|
|
- shape = 3,
|
|
|
|
- size = 0.1
|
|
|
|
- )
|
|
|
|
|
|
+generate_rank_plot_configs <- function(df_filtered, variables, is_lm = FALSE) {
|
|
|
|
+ # Define SD bands
|
|
|
|
+ sd_bands <- c(1, 2, 3)
|
|
|
|
+
|
|
|
|
+ # Initialize list to store plot configurations
|
|
|
|
+ configs <- list()
|
|
|
|
+
|
|
|
|
+ # SD-based plots for L and K
|
|
|
|
+ for (variable in c("L", "K")) {
|
|
|
|
+ for (sd_band in sd_bands) {
|
|
|
|
+ # Determine columns based on whether it's lm or not
|
|
|
|
+ if (is_lm) {
|
|
|
|
+ rank_var <- paste0(variable, "_Rank_lm")
|
|
|
|
+ zscore_var <- paste0("Z_lm_", variable)
|
|
|
|
+ y_label <- paste("Int Z score", variable)
|
|
|
|
+ } else {
|
|
|
|
+ rank_var <- paste0(variable, "_Rank")
|
|
|
|
+ zscore_var <- paste0("Avg_Zscore_", variable)
|
|
|
|
+ y_label <- paste("Avg Z score", variable)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # Calculate counts for annotations
|
|
|
|
+ num_enhancers <- sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE)
|
|
|
|
+ num_suppressors <- sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
|
|
|
+
|
|
|
|
+ # Annotated Plot Configuration
|
|
|
|
+ configs[[length(configs) + 1]] <- list(
|
|
|
|
+ df = df_filtered,
|
|
|
|
+ x_var = rank_var,
|
|
|
|
+ y_var = zscore_var,
|
|
|
|
+ plot_type = "scatter",
|
|
|
|
+ title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD"),
|
|
|
|
+ sd_band = sd_band,
|
|
|
|
+ annotations = list(
|
|
|
|
+ list(
|
|
|
|
+ x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
+ y = 10,
|
|
|
|
+ label = paste("Deletion Enhancers =", num_enhancers)
|
|
|
|
+ ),
|
|
|
|
+ list(
|
|
|
|
+ x = median(df_filtered[[rank_var]], na.rm = TRUE),
|
|
|
|
+ y = -10,
|
|
|
|
+ label = paste("Deletion Suppressors =", num_suppressors)
|
|
|
|
+ )
|
|
|
|
+ ),
|
|
|
|
+ sd_band_values = sd_band,
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.1,
|
|
|
|
+ y_label = y_label,
|
|
|
|
+ x_label = "Rank",
|
|
|
|
+ legend_position = "none"
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Non-Annotated Plot Configuration
|
|
|
|
+ configs[[length(configs) + 1]] <- list(
|
|
|
|
+ df = df_filtered,
|
|
|
|
+ x_var = rank_var,
|
|
|
|
+ y_var = zscore_var,
|
|
|
|
+ plot_type = "scatter",
|
|
|
|
+ title = paste(y_label, "vs. Rank for", variable, "above", sd_band, "SD No Annotations"),
|
|
|
|
+ sd_band = sd_band,
|
|
|
|
+ annotations = NULL,
|
|
|
|
+ sd_band_values = sd_band,
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.1,
|
|
|
|
+ y_label = y_label,
|
|
|
|
+ x_label = "Rank",
|
|
|
|
+ legend_position = "none"
|
|
|
|
+ )
|
|
|
|
+ }
|
|
}
|
|
}
|
|
- }
|
|
|
|
-
|
|
|
|
- # Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC
|
|
|
|
- for (variable in variables) {
|
|
|
|
- for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) {
|
|
|
|
-
|
|
|
|
- # Define x and y variables based on plot type
|
|
|
|
- if (plot_type == "Avg_Zscore_vs_lm") {
|
|
|
|
- x_var <- paste0("Avg_Zscore_", variable)
|
|
|
|
- y_var <- paste0("Z_lm_", variable)
|
|
|
|
- title_suffix <- paste("Avg Zscore vs lm", variable)
|
|
|
|
- } else if (plot_type == "Rank_Avg_Zscore_vs_lm") {
|
|
|
|
- x_var <- paste0(variable, "_Rank")
|
|
|
|
- y_var <- paste0(variable, "_Rank_lm")
|
|
|
|
- title_suffix <- paste("Rank Avg Zscore vs lm", variable)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- # Determine y-axis label
|
|
|
|
- if (plot_type == "Avg_Zscore_vs_lm") {
|
|
|
|
- y_label <- paste("Z lm", variable)
|
|
|
|
- } else {
|
|
|
|
- y_label <- paste("Rank lm", variable)
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- # Determine correlation text (R-squared)
|
|
|
|
- lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered)
|
|
|
|
- r_squared <- summary(lm_fit)$r.squared
|
|
|
|
-
|
|
|
|
- # Plot Configuration
|
|
|
|
- configs[[length(configs) + 1]] <- list(
|
|
|
|
- df = df_filtered,
|
|
|
|
- x_var = x_var,
|
|
|
|
- y_var = y_var,
|
|
|
|
- plot_type = "scatter",
|
|
|
|
- title = title_suffix,
|
|
|
|
- annotations = list(
|
|
|
|
- list(
|
|
|
|
- x = 0,
|
|
|
|
- y = 0,
|
|
|
|
- label = paste("R-squared =", round(r_squared, 2))
|
|
|
|
- )
|
|
|
|
- ),
|
|
|
|
- sd_band_values = NULL, # Not applicable
|
|
|
|
- shape = 3,
|
|
|
|
- size = 0.1,
|
|
|
|
- add_smooth = TRUE,
|
|
|
|
- lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]),
|
|
|
|
- legend_position = "right"
|
|
|
|
- )
|
|
|
|
|
|
+
|
|
|
|
+ # Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC
|
|
|
|
+ for (variable in variables) {
|
|
|
|
+ for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) {
|
|
|
|
+ # Define x and y variables based on plot type
|
|
|
|
+ if (plot_type == "Avg_Zscore_vs_lm") {
|
|
|
|
+ x_var <- paste0("Avg_Zscore_", variable)
|
|
|
|
+ y_var <- paste0("Z_lm_", variable)
|
|
|
|
+ title_suffix <- paste("Avg Zscore vs lm", variable)
|
|
|
|
+ } else {
|
|
|
|
+ x_var <- paste0(variable, "_Rank")
|
|
|
|
+ y_var <- paste0(variable, "_Rank_lm")
|
|
|
|
+ title_suffix <- paste("Rank Avg Zscore vs lm", variable)
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # Fit linear model
|
|
|
|
+ lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered)
|
|
|
|
+
|
|
|
|
+ # Check for perfect fit
|
|
|
|
+ if (summary(lm_fit)$sigma == 0) {
|
|
|
|
+ next # Skip this iteration if the fit is perfect
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ # Calculate R-squared
|
|
|
|
+ r_squared <- summary(lm_fit)$r.squared
|
|
|
|
+
|
|
|
|
+ # Plot Configuration
|
|
|
|
+ configs[[length(configs) + 1]] <- list(
|
|
|
|
+ df = df_filtered,
|
|
|
|
+ x_var = x_var,
|
|
|
|
+ y_var = y_var,
|
|
|
|
+ plot_type = "scatter",
|
|
|
|
+ title = title_suffix,
|
|
|
|
+ annotations = list(
|
|
|
|
+ list(
|
|
|
|
+ x = 0,
|
|
|
|
+ y = 0,
|
|
|
|
+ label = paste("R-squared =", round(r_squared, 2))
|
|
|
|
+ )
|
|
|
|
+ ),
|
|
|
|
+ sd_band_values = NULL, # Not applicable
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.1,
|
|
|
|
+ add_smooth = TRUE,
|
|
|
|
+ lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]),
|
|
|
|
+ legend_position = "right",
|
|
|
|
+ color_var = "Overlap",
|
|
|
|
+ x_label = x_var,
|
|
|
|
+ y_label = y_var
|
|
|
|
+ )
|
|
|
|
+ }
|
|
}
|
|
}
|
|
- }
|
|
|
|
-
|
|
|
|
- return(configs)
|
|
|
|
|
|
+
|
|
|
|
+ return(configs)
|
|
}
|
|
}
|
|
|
|
|
|
generate_correlation_plot_configs <- function(df) {
|
|
generate_correlation_plot_configs <- function(df) {
|
|
@@ -831,7 +885,7 @@ generate_correlation_plot_configs <- function(df) {
|
|
config <- list(
|
|
config <- list(
|
|
df = df,
|
|
df = df,
|
|
x_var = rel$x,
|
|
x_var = rel$x,
|
|
- y_var = rel$y,
|
|
|
|
|
|
+ y_var = rel.y,
|
|
plot_type = "scatter",
|
|
plot_type = "scatter",
|
|
title = rel$label,
|
|
title = rel$label,
|
|
x_label = paste("z-score", gsub("Z_lm_", "", rel$x)),
|
|
x_label = paste("z-score", gsub("Z_lm_", "", rel$x)),
|
|
@@ -839,9 +893,19 @@ generate_correlation_plot_configs <- function(df) {
|
|
annotations = list(
|
|
annotations = list(
|
|
list(x = 0, y = 0, label = paste("R-squared =", round(lm_summary$r.squared, 3)))
|
|
list(x = 0, y = 0, label = paste("R-squared =", round(lm_summary$r.squared, 3)))
|
|
),
|
|
),
|
|
- add_smooth = TRUE, # This flags that a geom_smooth layer should be added
|
|
|
|
- lm_line = list(intercept = coef(lm_model)[1], slope = coef(lm_model)[2]), # For direct geom_abline if needed
|
|
|
|
- legend_position = "right"
|
|
|
|
|
|
+ add_smooth = TRUE, # Add regression line
|
|
|
|
+ lm_line = list(intercept = coef(lm_model)[1], slope = coef(lm_model)[2]),
|
|
|
|
+ legend_position = "right",
|
|
|
|
+ shape = 3,
|
|
|
|
+ size = 0.5,
|
|
|
|
+ color_var = "Overlap",
|
|
|
|
+ rectangles = list(
|
|
|
|
+ list(
|
|
|
|
+ xmin = -2, xmax = 2, ymin = -2, ymax = 2,
|
|
|
|
+ fill = NA, color = "grey20", alpha = 0.1
|
|
|
|
+ )
|
|
|
|
+ ),
|
|
|
|
+ cyan_points = TRUE
|
|
)
|
|
)
|
|
|
|
|
|
configs[[length(configs) + 1]] <- config
|
|
configs[[length(configs) + 1]] <- config
|
|
@@ -850,6 +914,7 @@ generate_correlation_plot_configs <- function(df) {
|
|
return(configs)
|
|
return(configs)
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+
|
|
filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FALSE,
|
|
filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FALSE,
|
|
rank = FALSE, limits_map = NULL, verbose = TRUE) {
|
|
rank = FALSE, limits_map = NULL, verbose = TRUE) {
|
|
|
|
|
|
@@ -1188,19 +1253,16 @@ main <- function() {
|
|
)
|
|
)
|
|
)
|
|
)
|
|
|
|
|
|
- # message("Generating quality control plots")
|
|
|
|
- # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
|
|
|
|
- # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
|
|
|
|
- # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
|
|
|
|
- # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
|
|
|
|
- # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
|
|
|
|
- # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
|
|
|
|
- # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots)
|
|
|
|
- # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
|
|
|
|
- # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
|
|
|
|
-
|
|
|
|
- # TODO: Originally this filtered L NA's
|
|
|
|
- # Let's try to avoid for now since stats have already been calculated
|
|
|
|
|
|
+ message("Generating quality control plots")
|
|
|
|
+ generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
|
|
|
|
+ generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
|
|
|
|
+ generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
|
|
|
|
+ generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
|
|
|
|
+ generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
|
|
|
|
+ generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
|
|
|
|
+ generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots)
|
|
|
|
+ generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
|
|
|
|
+ generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
|
|
|
|
|
|
# Process background strains
|
|
# Process background strains
|
|
bg_strains <- c("YDL227C")
|
|
bg_strains <- c("YDL227C")
|
|
@@ -1349,8 +1411,7 @@ main <- function() {
|
|
rank_plot_configs <- generate_rank_plot_configs(
|
|
rank_plot_configs <- generate_rank_plot_configs(
|
|
df = zscores_interactions_joined_filtered,
|
|
df = zscores_interactions_joined_filtered,
|
|
variables = interaction_vars,
|
|
variables = interaction_vars,
|
|
- is_lm = FALSE,
|
|
|
|
- adjust = TRUE
|
|
|
|
|
|
+ is_lm = FALSE
|
|
)
|
|
)
|
|
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
|
|
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
|
|
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
|
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
|
@@ -1359,8 +1420,7 @@ main <- function() {
|
|
rank_lm_plot_configs <- generate_rank_plot_configs(
|
|
rank_lm_plot_configs <- generate_rank_plot_configs(
|
|
df = zscores_interactions_joined_filtered,
|
|
df = zscores_interactions_joined_filtered,
|
|
variables = interaction_vars,
|
|
variables = interaction_vars,
|
|
- is_lm = TRUE,
|
|
|
|
- adjust = TRUE
|
|
|
|
|
|
+ is_lm = TRUE
|
|
)
|
|
)
|
|
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
|
|
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
|
|
plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
|
plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
|
@@ -1370,8 +1430,6 @@ main <- function() {
|
|
zscores_interactions_filtered <- zscores_interactions_joined %>%
|
|
zscores_interactions_filtered <- zscores_interactions_joined %>%
|
|
group_by(across(all_of(c("OrfRep", "Gene", "num")))) %>%
|
|
group_by(across(all_of(c("OrfRep", "Gene", "num")))) %>%
|
|
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>%
|
|
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>%
|
|
- ungroup() %>%
|
|
|
|
- rowwise() %>%
|
|
|
|
mutate(
|
|
mutate(
|
|
lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA,
|
|
lm_R_squared_L = if (n() > 1) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA,
|
|
lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA,
|
|
lm_R_squared_K = if (n() > 1) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA,
|
|
@@ -1387,8 +1445,7 @@ main <- function() {
|
|
Z_lm_L <= -2 & Avg_Zscore_L >= 2 ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
|
|
Z_lm_L <= -2 & Avg_Zscore_L >= 2 ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
|
|
TRUE ~ "No Effect"
|
|
TRUE ~ "No Effect"
|
|
)
|
|
)
|
|
- ) %>%
|
|
|
|
- ungroup()
|
|
|
|
|
|
+ )
|
|
|
|
|
|
# Re-rank
|
|
# Re-rank
|
|
zscores_interactions_filtered <- filter_data(
|
|
zscores_interactions_filtered <- filter_data(
|
|
@@ -1401,8 +1458,7 @@ main <- function() {
|
|
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
|
rank_plot_filtered_configs <- generate_rank_plot_configs(
|
|
df = zscores_interactions_filtered,
|
|
df = zscores_interactions_filtered,
|
|
variables = interaction_vars,
|
|
variables = interaction_vars,
|
|
- is_lm = FALSE,
|
|
|
|
- adjust = FALSE
|
|
|
|
|
|
+ is_lm = FALSE
|
|
)
|
|
)
|
|
|
|
|
|
message("Generating filtered ranked plots")
|
|
message("Generating filtered ranked plots")
|
|
@@ -1416,8 +1472,7 @@ main <- function() {
|
|
rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
|
|
rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
|
|
df = zscores_interactions_filtered,
|
|
df = zscores_interactions_filtered,
|
|
variables = interaction_vars,
|
|
variables = interaction_vars,
|
|
- is_lm = TRUE,
|
|
|
|
- adjust = FALSE
|
|
|
|
|
|
+ is_lm = TRUE
|
|
)
|
|
)
|
|
generate_and_save_plots(
|
|
generate_and_save_plots(
|
|
output_dir = out_dir,
|
|
output_dir = out_dir,
|