Simplify plot config format
This commit is contained in:
@@ -112,7 +112,7 @@ theme_publication <- function(base_size = 14, base_family = "sans", legend_posit
|
|||||||
legend.spacing = unit(0, "cm"),
|
legend.spacing = unit(0, "cm"),
|
||||||
legend.title = element_text(face = "italic", size = rel(1.3)),
|
legend.title = element_text(face = "italic", size = rel(1.3)),
|
||||||
legend.text = element_text(size = rel(1.2)),
|
legend.text = element_text(size = rel(1.2)),
|
||||||
plot.margin = unit(c(10, 5, 5, 5), "mm"),
|
plot.margin = unit(c(10, 5, 5, 5), "mm")
|
||||||
# strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
|
# strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
|
||||||
# strip.text = element_text(face = "bold")
|
# strip.text = element_text(face = "bold")
|
||||||
)
|
)
|
||||||
@@ -150,9 +150,8 @@ load_and_filter_data <- function(easy_results_file, sd = 3) {
|
|||||||
SM = 0,
|
SM = 0,
|
||||||
OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded?
|
OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded?
|
||||||
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
|
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
|
||||||
conc_num_factor_new = factor(conc_num),
|
conc_num_factor = as.numeric(as.factor(conc_num)) - 1, # for legacy purposes
|
||||||
conc_num_factor_zeroed = factor(as.numeric(conc_num_factor_new) - 1),
|
conc_num_factor_factor = factor(conc_num)
|
||||||
conc_num_factor = as.numeric(conc_num_factor_zeroed) # for legacy purposes
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return(df)
|
return(df)
|
||||||
@@ -186,12 +185,12 @@ calculate_summary_stats <- function(df, variables, group_vars) {
|
|||||||
N = n(),
|
N = n(),
|
||||||
across(all_of(variables),
|
across(all_of(variables),
|
||||||
list(
|
list(
|
||||||
mean = ~mean(., na.rm = TRUE),
|
mean = ~ mean(.x, na.rm = TRUE),
|
||||||
median = ~median(., na.rm = TRUE),
|
median = ~ median(.x, na.rm = TRUE),
|
||||||
max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
max = ~ ifelse(all(is.na(.x)), NA, max(.x, na.rm = TRUE)),
|
||||||
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
min = ~ ifelse(all(is.na(.x)), NA, min(.x, na.rm = TRUE)),
|
||||||
sd = ~sd(., na.rm = TRUE),
|
sd = ~ sd(.x, na.rm = TRUE),
|
||||||
se = ~sd(., na.rm = TRUE) / sqrt(N - 1) # Bessel's correction
|
se = ~ sd(.x, na.rm = TRUE) / sqrt(n() - 1)
|
||||||
),
|
),
|
||||||
.names = "{.fn}_{.col}"
|
.names = "{.fn}_{.col}"
|
||||||
),
|
),
|
||||||
@@ -335,7 +334,8 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
|||||||
# Declare column order for output
|
# Declare column order for output
|
||||||
calculations <- calculations %>%
|
calculations <- calculations %>%
|
||||||
select(
|
select(
|
||||||
"OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "N",
|
"OrfRep", "Gene", "num", "N",
|
||||||
|
"conc_num", "conc_num_factor", "conc_num_factor_factor",
|
||||||
"mean_L", "mean_K", "mean_r", "mean_AUC",
|
"mean_L", "mean_K", "mean_r", "mean_AUC",
|
||||||
"median_L", "median_K", "median_r", "median_AUC",
|
"median_L", "median_K", "median_r", "median_AUC",
|
||||||
"sd_L", "sd_K", "sd_r", "sd_AUC",
|
"sd_L", "sd_K", "sd_r", "sd_AUC",
|
||||||
@@ -352,7 +352,8 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
|||||||
|
|
||||||
interactions <- interactions %>%
|
interactions <- interactions %>%
|
||||||
select(
|
select(
|
||||||
"OrfRep", "Gene", "conc_num", "conc_num_factor", "num", "NG", "DB", "SM",
|
"OrfRep", "Gene", "num", "NG", "DB", "SM",
|
||||||
|
"conc_num", "conc_num_factor", "conc_num_factor_factor",
|
||||||
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
||||||
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
||||||
"Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
|
"Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
|
||||||
@@ -366,10 +367,11 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
|||||||
cleaned_df <- df %>%
|
cleaned_df <- df %>%
|
||||||
select(-any_of(
|
select(-any_of(
|
||||||
setdiff(intersect(names(df), names(calculations)),
|
setdiff(intersect(names(df), names(calculations)),
|
||||||
c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))))
|
c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))))
|
||||||
|
|
||||||
# Join the original dataframe with calculations
|
# Join the original dataframe with calculations
|
||||||
df_with_calculations <- left_join(cleaned_df, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
df_with_calculations <- left_join(cleaned_df, calculations,
|
||||||
|
by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
||||||
|
|
||||||
# Remove overlapping columns between df_with_calculations and interactions
|
# Remove overlapping columns between df_with_calculations and interactions
|
||||||
df_with_calculations_clean <- df_with_calculations %>%
|
df_with_calculations_clean <- df_with_calculations %>%
|
||||||
@@ -378,7 +380,8 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
|||||||
c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))))
|
c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))))
|
||||||
|
|
||||||
# Join with interactions to create the full dataset
|
# Join with interactions to create the full dataset
|
||||||
full_data <- left_join(df_with_calculations_clean, interactions, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
full_data <- left_join(df_with_calculations_clean, interactions,
|
||||||
|
by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
||||||
|
|
||||||
return(list(
|
return(list(
|
||||||
calculations = calculations,
|
calculations = calculations,
|
||||||
@@ -390,131 +393,67 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
|||||||
generate_and_save_plots <- function(out_dir, filename, plot_configs) {
|
generate_and_save_plots <- function(out_dir, filename, plot_configs) {
|
||||||
message("Generating ", filename, ".pdf and ", filename, ".html")
|
message("Generating ", filename, ".pdf and ", filename, ".html")
|
||||||
|
|
||||||
for (config_group in plot_configs) {
|
grid_nrow <- ifelse(is.null(plot_configs$grid_layout$nrow), 1, plot_configs$grid_layout$nrow)
|
||||||
plot_list <- config_group$plots
|
grid_ncol <- ifelse(is.null(plot_configs$grid_layout$ncol), length(plot_configs$plots), plot_configs$grid_layout$ncol)
|
||||||
grid_nrow <- config_group$grid_layout$nrow
|
|
||||||
grid_ncol <- config_group$grid_layout$ncol
|
|
||||||
|
|
||||||
# Set defaults if nrow or ncol are not provided
|
|
||||||
if (is.null(grid_nrow) || is.null(grid_ncol)) {
|
|
||||||
num_plots <- length(plot_list)
|
|
||||||
grid_nrow <- ifelse(is.null(grid_nrow), 1, grid_nrow)
|
|
||||||
grid_ncol <- ifelse(is.null(grid_ncol), num_plots, grid_ncol)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Prepare lists to collect static and interactive plots
|
|
||||||
static_plots <- list()
|
static_plots <- list()
|
||||||
plotly_plots <- list()
|
plotly_plots <- list()
|
||||||
|
|
||||||
# Generate each individual plot based on the configuration
|
for (i in seq_along(plot_configs$plots)) {
|
||||||
for (i in seq_along(plot_list)) {
|
config <- plot_configs$plots[[i]]
|
||||||
config <- plot_list[[i]]
|
|
||||||
df <- config$df
|
df <- config$df
|
||||||
|
|
||||||
# Create the base plot
|
# Define aes_mapping and handle color_var defaulting
|
||||||
aes_mapping <- if (config$plot_type == "bar") {
|
aes_mapping <- if (config$plot_type == "bar") {
|
||||||
if (!is.null(config$color_var)) {
|
aes(x = .data[[config$x_var]], fill = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"),
|
||||||
aes(x = .data[[config$x_var]], fill = as.factor(.data[[config$color_var]]), color = as.factor(.data[[config$color_var]]))
|
color = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"))
|
||||||
} else {
|
|
||||||
aes(x = .data[[config$x_var]])
|
|
||||||
}
|
|
||||||
} else if (config$plot_type == "density") {
|
} else if (config$plot_type == "density") {
|
||||||
if (!is.null(config$color_var)) {
|
aes(x = .data[[config$x_var]], color = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"))
|
||||||
aes(x = .data[[config$x_var]], color = as.factor(.data[[config$color_var]]))
|
|
||||||
} else {
|
} else {
|
||||||
aes(x = .data[[config$x_var]])
|
aes(x = .data[[config$x_var]], y = .data[[config$y_var]], color = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"))
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (!is.null(config$color_var)) {
|
|
||||||
aes(x = .data[[config$x_var]], y = .data[[config$y_var]], color = as.factor(.data[[config$color_var]]))
|
|
||||||
} else {
|
|
||||||
aes(x = .data[[config$x_var]], y = .data[[config$y_var]])
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
plot <- ggplot(df, aes_mapping)
|
# Apply theme_publication with legend_position
|
||||||
|
plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position)
|
||||||
|
|
||||||
# Apply theme_publication with legend_position from config
|
# Apply appropriate plot function
|
||||||
legend_position <- if (!is.null(config$legend_position)) config$legend_position else "bottom"
|
|
||||||
plot <- plot + theme_publication(legend_position = legend_position)
|
|
||||||
|
|
||||||
# Use appropriate helper function based on plot type
|
|
||||||
plot <- switch(config$plot_type,
|
plot <- switch(config$plot_type,
|
||||||
"scatter" = generate_scatter_plot(plot, config),
|
"scatter" = generate_scatter_plot(plot, config),
|
||||||
"box" = generate_box_plot(plot, config),
|
"box" = generate_box_plot(plot, config),
|
||||||
"density" = plot + geom_density(),
|
"density" = plot + geom_density(),
|
||||||
"bar" = plot + geom_bar(),
|
"bar" = plot + geom_bar(),
|
||||||
plot # default case if no type matches
|
plot # default case
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add title and labels
|
# Add titles and labels
|
||||||
if (!is.null(config$title)) {
|
if (!is.null(config$title)) plot <- plot + ggtitle(config$title)
|
||||||
plot <- plot + ggtitle(config$title)
|
if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label)
|
||||||
}
|
if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
|
||||||
if (!is.null(config$x_label)) {
|
if (!is.null(config$coord_cartesian)) plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
|
||||||
plot <- plot + xlab(config$x_label)
|
|
||||||
}
|
|
||||||
if (!is.null(config$y_label)) {
|
|
||||||
plot <- plot + ylab(config$y_label)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add cartesian coordinates if specified
|
|
||||||
if (!is.null(config$coord_cartesian)) {
|
|
||||||
plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Apply scale_color_discrete(guide = FALSE) when color_var is NULL
|
|
||||||
if (is.null(config$color_var)) {
|
|
||||||
plot <- plot + scale_color_discrete(guide = "none")
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add interactive tooltips for plotly
|
# Add interactive tooltips for plotly
|
||||||
tooltip_vars <- c()
|
tooltip_vars <- ifelse(!is.null(config$tooltip_vars), config$tooltip_vars, "none")
|
||||||
if (config$plot_type == "scatter") {
|
plotly_plot <- suppressWarnings(plotly::ggplotly(plot, tooltip = tooltip_vars))
|
||||||
if (!is.null(config$delta_bg_point) && config$delta_bg_point) {
|
|
||||||
tooltip_vars <- c(tooltip_vars, "OrfRep", "Gene", "delta_bg")
|
|
||||||
} else if (!is.null(config$gene_point) && config$gene_point) {
|
|
||||||
tooltip_vars <- c(tooltip_vars, "OrfRep", "Gene")
|
|
||||||
} else if (!is.null(config$y_var) && !is.null(config$x_var)) {
|
|
||||||
tooltip_vars <- c(config$x_var, config$y_var)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Convert to plotly object and suppress warnings here
|
# Adjust legend position in plotly
|
||||||
plotly_plot <- suppressWarnings({
|
|
||||||
if (length(tooltip_vars) > 0) {
|
|
||||||
plotly::ggplotly(plot, tooltip = tooltip_vars)
|
|
||||||
} else {
|
|
||||||
plotly::ggplotly(plot, tooltip = "none")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
# Adjust legend position if specified
|
|
||||||
if (!is.null(config$legend_position) && config$legend_position == "bottom") {
|
if (!is.null(config$legend_position) && config$legend_position == "bottom") {
|
||||||
plotly_plot <- plotly_plot %>% layout(legend = list(orientation = "h"))
|
plotly_plot <- plotly_plot %>% layout(legend = list(orientation = "h"))
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add plots to lists
|
# Add static and interactive plots to lists
|
||||||
static_plots[[i]] <- plot
|
static_plots[[i]] <- plot
|
||||||
plotly_plots[[i]] <- plotly_plot
|
plotly_plots[[i]] <- plotly_plot
|
||||||
}
|
}
|
||||||
|
|
||||||
# Save static PDF plot(s) for the current grid
|
# Save static PDF plots
|
||||||
pdf(file.path(out_dir, paste0(filename, ".pdf")), width = 16, height = 9)
|
pdf(file.path(out_dir, paste0(filename, ".pdf")), width = 16, height = 9)
|
||||||
grid.arrange(grobs = static_plots, ncol = grid_ncol, nrow = grid_nrow)
|
grid.arrange(grobs = static_plots, ncol = grid_ncol, nrow = grid_nrow)
|
||||||
dev.off()
|
dev.off()
|
||||||
|
|
||||||
combined_plot <- plotly::subplot(
|
# Save combined interactive HTML plots
|
||||||
plotly_plots,
|
combined_plot <- plotly::subplot(plotly_plots, nrows = grid_nrow, ncols = grid_ncol, margin = 0.05)
|
||||||
nrows = grid_nrow,
|
|
||||||
ncols = grid_ncol,
|
|
||||||
margin = 0.05
|
|
||||||
)
|
|
||||||
|
|
||||||
# Save combined HTML plot(s)
|
|
||||||
html_file <- file.path(out_dir, paste0(filename, ".html"))
|
html_file <- file.path(out_dir, paste0(filename, ".html"))
|
||||||
saveWidget(combined_plot, file = html_file, selfcontained = TRUE)
|
saveWidget(combined_plot, file = html_file, selfcontained = TRUE)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_scatter_plot <- function(plot, config) {
|
generate_scatter_plot <- function(plot, config) {
|
||||||
@@ -671,51 +610,39 @@ generate_box_plot <- function(plot, config) {
|
|||||||
return(plot)
|
return(plot)
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_plate_analysis_plot_configs <- function(variables, stages = c("before", "after"),
|
generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df_after = NULL,
|
||||||
df_before = NULL, df_after = NULL, plot_type = "scatter") {
|
plot_type = "scatter", stages = c("before", "after")) {
|
||||||
plots <- list()
|
plots <- list()
|
||||||
|
|
||||||
for (var in variables) {
|
for (var in variables) {
|
||||||
for (stage in stages) {
|
for (stage in stages) {
|
||||||
df_plot <- if (stage == "before") df_before else df_after
|
df_plot <- if (stage == "before") df_before else df_after
|
||||||
|
|
||||||
# Check for non-finite values in the y-variable
|
# Check for non-finite values in the y-variable
|
||||||
df_plot_filtered <- df_plot %>%
|
df_plot_filtered <- df_plot %>% filter(is.finite(!!sym(var)))
|
||||||
filter(is.finite(!!sym(var)))
|
|
||||||
|
|
||||||
# Count removed rows
|
|
||||||
removed_rows <- nrow(df_plot) - nrow(df_plot_filtered)
|
|
||||||
if (removed_rows > 0) {
|
|
||||||
message(sprintf("Removed %d non-finite values for variable %s during stage %s", removed_rows, var, stage))
|
|
||||||
}
|
|
||||||
|
|
||||||
# Adjust settings based on plot_type
|
# Adjust settings based on plot_type
|
||||||
if (plot_type == "scatter") {
|
|
||||||
error_bar <- TRUE
|
|
||||||
position <- "jitter"
|
|
||||||
} else if (plot_type == "box") {
|
|
||||||
error_bar <- FALSE
|
|
||||||
position <- NULL
|
|
||||||
}
|
|
||||||
|
|
||||||
config <- list(
|
config <- list(
|
||||||
df = df_plot,
|
df = df_plot_filtered,
|
||||||
x_var = "scan",
|
x_var = "scan",
|
||||||
y_var = var,
|
y_var = var,
|
||||||
plot_type = plot_type,
|
plot_type = plot_type,
|
||||||
title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
|
title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
|
||||||
error_bar = error_bar,
|
color_var = "conc_num_factor_factor",
|
||||||
color_var = "conc_num_factor_zeroed",
|
position = if (plot_type == "scatter") "jitter" else NULL,
|
||||||
position = position,
|
size = 0.2,
|
||||||
size = 0.2
|
error_bar = (plot_type == "scatter")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add config to plots list
|
||||||
plots <- append(plots, list(config))
|
plots <- append(plots, list(config))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return(plots)
|
|
||||||
|
return(list(grid_layout = list(ncol = 1, nrow = length(plots)), plots = plots))
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_interaction_plot_configs <- function(df, limits_map = NULL, plot_type = "reference") {
|
generate_interaction_plot_configs <- function(df, limits_map = NULL, plot_type = "reference") {
|
||||||
# Define limits if not provided
|
|
||||||
if (is.null(limits_map)) {
|
if (is.null(limits_map)) {
|
||||||
limits_map <- list(
|
limits_map <- list(
|
||||||
L = c(0, 130),
|
L = c(0, 130),
|
||||||
@@ -729,94 +656,74 @@ generate_interaction_plot_configs <- function(df, limits_map = NULL, plot_type =
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Define grouping variables and filter data based on plot type
|
group_vars <- if (plot_type == "reference") c("OrfRep", "Gene", "num") else c("OrfRep", "Gene")
|
||||||
if (plot_type == "reference") {
|
|
||||||
group_vars <- c("OrfRep", "Gene", "num")
|
|
||||||
df_filtered <- df %>%
|
df_filtered <- df %>%
|
||||||
mutate(
|
mutate(OrfRepCombined = if (plot_type == "reference") paste(OrfRep, Gene, num, sep = "_") else paste(OrfRep, Gene, sep = "_"))
|
||||||
OrfRepCombined = paste(OrfRep, Gene, num, sep = "_")
|
|
||||||
)
|
|
||||||
} else if (plot_type == "deletion") {
|
|
||||||
group_vars <- c("OrfRep", "Gene")
|
|
||||||
df_filtered <- df %>%
|
|
||||||
mutate(
|
|
||||||
OrfRepCombined = paste(OrfRep, Gene, sep = "_") # Compare by OrfRep and Gene for deletion
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Create a list to store all configs
|
plots <- list()
|
||||||
configs <- list()
|
|
||||||
|
|
||||||
# Generate the first 8 scatter/box plots for L, K, r, AUC (shared between reference and deletion)
|
# Generate plots for overall variables (L, K, r, AUC)
|
||||||
overall_vars <- c("L", "K", "r", "AUC")
|
for (var in c("L", "K", "r", "AUC")) {
|
||||||
for (var in overall_vars) {
|
|
||||||
y_limits <- limits_map[[var]]
|
y_limits <- limits_map[[var]]
|
||||||
|
|
||||||
config <- list(
|
config <- list(
|
||||||
df = df_filtered,
|
df = df_filtered,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
x_var = "conc_num_factor_new",
|
x_var = "conc_num_factor_factor",
|
||||||
y_var = var,
|
y_var = var,
|
||||||
x_label = unique(df_filtered$Drug)[1],
|
x_label = unique(df_filtered$Drug)[1],
|
||||||
title = sprintf("Scatter RF for %s with SD", var),
|
title = sprintf("Scatter RF for %s with SD", var),
|
||||||
coord_cartesian = y_limits,
|
coord_cartesian = y_limits,
|
||||||
error_bar = TRUE,
|
error_bar = TRUE,
|
||||||
x_breaks = unique(df_filtered$conc_num_factor_new),
|
x_breaks = unique(df_filtered$conc_num_factor_factor),
|
||||||
x_labels = as.character(unique(df_filtered$conc_num)),
|
x_labels = as.character(unique(df_filtered$conc_num)),
|
||||||
grid_layout = list(ncol = 2, nrow = 2),
|
grid_layout = list(ncol = 2, nrow = 2),
|
||||||
position = "jitter"
|
position = "jitter",
|
||||||
|
smooth = TRUE
|
||||||
)
|
)
|
||||||
configs <- append(configs, list(config))
|
plots <- append(plots, list(config))
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generate Delta comparison plots (4x3 grid for deletion and reference)
|
# Generate Delta comparison plots
|
||||||
unique_groups <- df_filtered %>% select(all_of(group_vars)) %>% distinct()
|
unique_groups <- df_filtered %>% select(all_of(group_vars)) %>% distinct()
|
||||||
|
|
||||||
for (i in seq_len(nrow(unique_groups))) {
|
for (i in seq_len(nrow(unique_groups))) {
|
||||||
group <- unique_groups[i, ]
|
group <- unique_groups[i, ]
|
||||||
group_data <- df_filtered %>% filter(across(all_of(group_vars), ~ . == group[[cur_column()]]))
|
group_data <- df_filtered %>% semi_join(group, by = group_vars)
|
||||||
|
|
||||||
OrfRep <- as.character(group$OrfRep)
|
OrfRep <- as.character(group$OrfRep)
|
||||||
Gene <- if ("Gene" %in% names(group)) as.character(group$Gene) else ""
|
Gene <- if ("Gene" %in% names(group)) as.character(group$Gene) else ""
|
||||||
num <- if ("num" %in% names(group)) as.character(group$num) else ""
|
num <- if ("num" %in% names(group)) as.character(group$num) else ""
|
||||||
OrfRepCombined <- paste(OrfRep, Gene, num, sep = "_")
|
|
||||||
|
|
||||||
# Generate plots for Delta variables
|
for (var in c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC")) {
|
||||||
delta_vars <- c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC")
|
|
||||||
for (var in delta_vars) {
|
|
||||||
y_limits <- limits_map[[var]]
|
y_limits <- limits_map[[var]]
|
||||||
upper_y <- y_limits[2]
|
y_span <- y_limits[2] - y_limits[1]
|
||||||
lower_y <- y_limits[1]
|
|
||||||
y_span <- upper_y - lower_y
|
|
||||||
|
|
||||||
# Get WT_sd_var for error bar calculations
|
# Error bars
|
||||||
WT_sd_var <- paste0("WT_sd_", sub("Delta_", "", var))
|
WT_sd_var <- paste0("WT_sd_", sub("Delta_", "", var))
|
||||||
WT_sd_value <- group_data[[WT_sd_var]][1]
|
WT_sd_value <- group_data[[WT_sd_var]][1]
|
||||||
error_bar_ymin <- 0 - (2 * WT_sd_value)
|
error_bar_ymin <- 0 - (2 * WT_sd_value)
|
||||||
error_bar_ymax <- 0 + (2 * WT_sd_value)
|
error_bar_ymax <- 0 + (2 * WT_sd_value)
|
||||||
|
|
||||||
# Set annotations (Z_Shifts, lm Z-Scores, NG, DB, SM values)
|
# Annotations
|
||||||
Z_Shift_var <- paste0("Z_Shift_", sub("Delta_", "", var))
|
Z_Shift_value <- round(group_data[[paste0("Z_Shift_", sub("Delta_", "", var))]][1], 2)
|
||||||
Z_lm_var <- paste0("Z_lm_", sub("Delta_", "", var))
|
Z_lm_value <- round(group_data[[paste0("Z_lm_", sub("Delta_", "", var))]][1], 2)
|
||||||
Z_Shift_value <- round(group_data[[Z_Shift_var]][1], 2)
|
|
||||||
Z_lm_value <- round(group_data[[Z_lm_var]][1], 2)
|
|
||||||
NG_value <- group_data$NG[1]
|
NG_value <- group_data$NG[1]
|
||||||
DB_value <- group_data$DB[1]
|
DB_value <- group_data$DB[1]
|
||||||
SM_value <- group_data$SM[1]
|
SM_value <- group_data$SM[1]
|
||||||
|
|
||||||
annotations <- list(
|
annotations <- list(
|
||||||
list(x = 1, y = upper_y - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)),
|
list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)),
|
||||||
list(x = 1, y = upper_y - 0.3 * y_span, label = paste("lm ZScore =", Z_lm_value)),
|
list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste("lm ZScore =", Z_lm_value)),
|
||||||
list(x = 1, y = lower_y + 0.2 * y_span, label = paste("NG =", NG_value)),
|
list(x = 1, y = y_limits[1] + 0.2 * y_span, label = paste("NG =", NG_value)),
|
||||||
list(x = 1, y = lower_y + 0.1 * y_span, label = paste("DB =", DB_value)),
|
list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("DB =", DB_value)),
|
||||||
list(x = 1, y = lower_y, label = paste("SM =", SM_value))
|
list(x = 1, y = y_limits[1], label = paste("SM =", SM_value))
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create configuration for each Delta plot
|
|
||||||
config <- list(
|
config <- list(
|
||||||
df = group_data,
|
df = group_data,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
x_var = "conc_num",
|
x_var = "conc_num_factor_factor",
|
||||||
y_var = var,
|
y_var = var,
|
||||||
x_label = unique(group_data$Drug)[1],
|
x_label = unique(group_data$Drug)[1],
|
||||||
title = paste(OrfRep, Gene, sep = " "),
|
title = paste(OrfRep, Gene, sep = " "),
|
||||||
@@ -827,23 +734,21 @@ generate_interaction_plot_configs <- function(df, limits_map = NULL, plot_type =
|
|||||||
ymin = error_bar_ymin,
|
ymin = error_bar_ymin,
|
||||||
ymax = error_bar_ymax
|
ymax = error_bar_ymax
|
||||||
),
|
),
|
||||||
lm_smooth = TRUE,
|
smooth = TRUE,
|
||||||
x_breaks = unique(group_data$conc_num_factor_new),
|
x_breaks = unique(group_data$conc_num_factor_factor),
|
||||||
x_labels = as.character(unique(group_data$conc_num)),
|
x_labels = as.character(unique(group_data$conc_num)),
|
||||||
ylim_vals = y_limits,
|
ylim_vals = y_limits,
|
||||||
grid_layout = list(ncol = 4, nrow = 3) # Adjust grid layout for gene-gene comparisons
|
grid_layout = list(ncol = 4, nrow = 3)
|
||||||
)
|
)
|
||||||
configs <- append(configs, list(config))
|
plots <- append(plots, list(config))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return(configs)
|
return(list(grid_layout = list(ncol = 2, nrow = 2), plots = plots))
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) {
|
generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) {
|
||||||
|
|
||||||
sd_bands <- c(1, 2, 3)
|
sd_bands <- c(1, 2, 3)
|
||||||
|
|
||||||
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
||||||
z_lm_cols <- paste0("Z_lm_", variables)
|
z_lm_cols <- paste0("Z_lm_", variables)
|
||||||
rank_avg_zscore_cols <- paste0("Rank_", variables)
|
rank_avg_zscore_cols <- paste0("Rank_", variables)
|
||||||
@@ -851,8 +756,8 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
|||||||
|
|
||||||
configs <- list()
|
configs <- list()
|
||||||
|
|
||||||
|
# Adjust values if necessary
|
||||||
if (adjust) {
|
if (adjust) {
|
||||||
message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
|
|
||||||
df <- df %>%
|
df <- df %>%
|
||||||
mutate(
|
mutate(
|
||||||
across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
|
across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
|
||||||
@@ -860,34 +765,22 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
message("Calculating ranks for Avg_Zscore and Z_lm columns")
|
# Calculate rank columns for Avg_Zscore and Z_lm columns
|
||||||
rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
|
|
||||||
df_ranked <- df %>%
|
df_ranked <- df %>%
|
||||||
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
|
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = paste0("Rank_", avg_zscore_cols))) %>%
|
||||||
|
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = paste0("Rank_lm_", z_lm_cols)))
|
||||||
|
|
||||||
rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
|
# Generate plots for SD-based L and K variables
|
||||||
df_ranked <- df_ranked %>%
|
|
||||||
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
|
|
||||||
|
|
||||||
# SD-based plots for L and K
|
|
||||||
for (variable in c("L", "K")) {
|
for (variable in c("L", "K")) {
|
||||||
|
rank_var <- if (is_lm) paste0("Rank_lm_", variable) else paste0("Rank_", variable)
|
||||||
if (is_lm) {
|
zscore_var <- if (is_lm) paste0("Z_lm_", variable) else paste0("Avg_Zscore_", variable)
|
||||||
rank_var <- paste0("Rank_lm_", variable)
|
y_label <- if (is_lm) paste("Int Z score", variable) else paste("Avg Z score", variable)
|
||||||
zscore_var <- paste0("Z_lm_", variable)
|
|
||||||
y_label <- paste("Int Z score", variable)
|
|
||||||
} else {
|
|
||||||
rank_var <- paste0("Rank_", variable)
|
|
||||||
zscore_var <- paste0("Avg_Zscore_", variable)
|
|
||||||
y_label <- paste("Avg Z score", variable)
|
|
||||||
}
|
|
||||||
|
|
||||||
for (sd_band in sd_bands) {
|
for (sd_band in sd_bands) {
|
||||||
|
|
||||||
num_enhancers <- sum(df_ranked[[zscore_var]] >= sd_band, na.rm = TRUE)
|
num_enhancers <- sum(df_ranked[[zscore_var]] >= sd_band, na.rm = TRUE)
|
||||||
num_suppressors <- sum(df_ranked[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
num_suppressors <- sum(df_ranked[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
||||||
|
|
||||||
# Annotated plot configuration
|
# Plot with annotations
|
||||||
configs[[length(configs) + 1]] <- list(
|
configs[[length(configs) + 1]] <- list(
|
||||||
df = df_ranked,
|
df = df_ranked,
|
||||||
x_var = rank_var,
|
x_var = rank_var,
|
||||||
@@ -903,27 +796,22 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
|||||||
list(
|
list(
|
||||||
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||||
y = max(df_ranked[[zscore_var]], na.rm = TRUE) * 0.9,
|
y = max(df_ranked[[zscore_var]], na.rm = TRUE) * 0.9,
|
||||||
label = paste("Deletion Enhancers =", num_enhancers),
|
label = paste("Deletion Enhancers =", num_enhancers)
|
||||||
hjust = 0.5,
|
|
||||||
vjust = 1
|
|
||||||
),
|
),
|
||||||
list(
|
list(
|
||||||
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||||
y = min(df_ranked[[zscore_var]], na.rm = TRUE) * 0.9,
|
y = min(df_ranked[[zscore_var]], na.rm = TRUE) * 0.9,
|
||||||
label = paste("Deletion Suppressors =", num_suppressors),
|
label = paste("Deletion Suppressors =", num_suppressors)
|
||||||
hjust = 0.5,
|
|
||||||
vjust = 0
|
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
shape = 3,
|
shape = 3,
|
||||||
size = 0.1,
|
size = 0.1,
|
||||||
y_label = y_label,
|
y_label = y_label,
|
||||||
x_label = "Rank",
|
x_label = "Rank",
|
||||||
legend_position = "none",
|
legend_position = "none"
|
||||||
grid_layout = list(ncol = 3, nrow = 2)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Non-Annotated Plot Configuration
|
# Plot without annotations
|
||||||
configs[[length(configs) + 1]] <- list(
|
configs[[length(configs) + 1]] <- list(
|
||||||
df = df_ranked,
|
df = df_ranked,
|
||||||
x_var = rank_var,
|
x_var = rank_var,
|
||||||
@@ -940,42 +828,26 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
|||||||
size = 0.1,
|
size = 0.1,
|
||||||
y_label = y_label,
|
y_label = y_label,
|
||||||
x_label = "Rank",
|
x_label = "Rank",
|
||||||
legend_position = "none",
|
legend_position = "none"
|
||||||
grid_layout = list(ncol = 3, nrow = 2)
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Avg ZScore and Rank Avg ZScore Plots for variables
|
# Generate Avg ZScore and Rank Avg ZScore plots for each variable
|
||||||
for (variable in variables) {
|
for (variable in variables) {
|
||||||
for (plot_type in c("Avg Zscore vs lm", "Rank Avg Zscore vs lm")) {
|
for (plot_type in c("Avg Zscore vs lm", "Rank Avg Zscore vs lm")) {
|
||||||
|
|
||||||
title <- paste(plot_type, variable)
|
title <- paste(plot_type, variable)
|
||||||
|
|
||||||
# Define specific variables based on plot type
|
# Define specific variables based on plot type
|
||||||
if (plot_type == "Avg Zscore vs lm") {
|
x_var <- if (plot_type == "Avg Zscore vs lm") paste0("Avg_Zscore_", variable) else paste0("Rank_", variable)
|
||||||
x_var <- paste0("Avg_Zscore_", variable)
|
y_var <- if (plot_type == "Avg Zscore vs lm") paste0("Z_lm_", variable) else paste0("Rank_lm_", variable)
|
||||||
y_var <- paste0("Z_lm_", variable)
|
|
||||||
rectangles <- list(
|
|
||||||
list(xmin = -2, xmax = 2, ymin = -2, ymax = 2,
|
|
||||||
fill = NA, color = "grey20", alpha = 0.1
|
|
||||||
)
|
|
||||||
)
|
|
||||||
} else if (plot_type == "Rank Avg Zscore vs lm") {
|
|
||||||
x_var <- paste0("Rank_", variable)
|
|
||||||
y_var <- paste0("Rank_lm_", variable)
|
|
||||||
rectangles <- NULL
|
|
||||||
}
|
|
||||||
|
|
||||||
# Fit the linear model
|
# Fit the linear model
|
||||||
lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_ranked)
|
lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_ranked)
|
||||||
|
|
||||||
# Extract intercept, slope, and R-squared from the model coefficients
|
|
||||||
intercept <- coef(lm_model)[1]
|
intercept <- coef(lm_model)[1]
|
||||||
slope <- coef(lm_model)[2]
|
slope <- coef(lm_model)[2]
|
||||||
r_squared <- summary(lm_model)$r.squared
|
r_squared <- summary(lm_model)$r.squared
|
||||||
|
|
||||||
# Annotations: include R-squared in the plot
|
|
||||||
annotations <- list(
|
annotations <- list(
|
||||||
list(
|
list(
|
||||||
x = mean(range(df_ranked[[x_var]], na.rm = TRUE)),
|
x = mean(range(df_ranked[[x_var]], na.rm = TRUE)),
|
||||||
@@ -987,6 +859,12 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
rectangles <- if (plot_type == "Avg Zscore vs lm") {
|
||||||
|
list(list(xmin = -2, xmax = 2, ymin = -2, ymax = 2, fill = NA, color = "grey20", alpha = 0.1))
|
||||||
|
} else {
|
||||||
|
NULL
|
||||||
|
}
|
||||||
|
|
||||||
configs[[length(configs) + 1]] <- list(
|
configs[[length(configs) + 1]] <- list(
|
||||||
df = df_ranked,
|
df = df_ranked,
|
||||||
x_var = x_var,
|
x_var = x_var,
|
||||||
@@ -1007,11 +885,11 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return(configs)
|
|
||||||
|
return(list(grid_layout = list(ncol = 3, nrow = 2), plots = configs))
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_correlation_plot_configs <- function(df, highlight_cyan = FALSE) {
|
generate_correlation_plot_configs <- function(df, highlight_cyan = FALSE) {
|
||||||
# Define relationships for plotting
|
|
||||||
relationships <- list(
|
relationships <- list(
|
||||||
list(x = "Z_lm_L", y = "Z_lm_K", label = "Interaction L vs. Interaction K"),
|
list(x = "Z_lm_L", y = "Z_lm_K", label = "Interaction L vs. Interaction K"),
|
||||||
list(x = "Z_lm_L", y = "Z_lm_r", label = "Interaction L vs. Interaction r"),
|
list(x = "Z_lm_L", y = "Z_lm_r", label = "Interaction L vs. Interaction r"),
|
||||||
@@ -1021,56 +899,36 @@ generate_correlation_plot_configs <- function(df, highlight_cyan = FALSE) {
|
|||||||
list(x = "Z_lm_r", y = "Z_lm_AUC", label = "Interaction r vs. Interaction AUC")
|
list(x = "Z_lm_r", y = "Z_lm_AUC", label = "Interaction r vs. Interaction AUC")
|
||||||
)
|
)
|
||||||
|
|
||||||
configs <- list()
|
plots <- list()
|
||||||
|
|
||||||
for (rel in relationships) {
|
for (rel in relationships) {
|
||||||
# Fit linear model
|
|
||||||
lm_model <- lm(as.formula(paste(rel$y, "~", rel$x)), data = df)
|
lm_model <- lm(as.formula(paste(rel$y, "~", rel$x)), data = df)
|
||||||
lm_summary <- summary(lm_model)
|
r_squared <- summary(lm_model)$r.squared
|
||||||
intercept <- coef(lm_model)[1]
|
|
||||||
slope <- coef(lm_model)[2]
|
|
||||||
r_squared <- lm_summary$r.squared
|
|
||||||
|
|
||||||
# Construct plot configuration
|
|
||||||
config <- list(
|
config <- list(
|
||||||
df = df,
|
df = df,
|
||||||
x_var = rel$x,
|
x_var = rel$x,
|
||||||
y_var = rel$y,
|
y_var = rel$y,
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
title = rel$label,
|
title = rel$label,
|
||||||
x_label = paste("z-score", gsub("Z_lm_", "", rel$x)),
|
|
||||||
y_label = paste("z-score", gsub("Z_lm_", "", rel$y)),
|
|
||||||
annotations = list(
|
annotations = list(
|
||||||
list(
|
list(x = mean(df[[rel$x]], na.rm = TRUE),
|
||||||
x = mean(range(df[[rel$x]], na.rm = TRUE)),
|
y = mean(df[[rel$y]], na.rm = TRUE),
|
||||||
y = mean(range(df[[rel$y]], na.rm = TRUE)),
|
label = paste("R-squared =", round(r_squared, 3)))
|
||||||
label = paste("R-squared =", round(r_squared, 3)),
|
|
||||||
hjust = 0.5,
|
|
||||||
vjust = 1,
|
|
||||||
size = 5,
|
|
||||||
color = "black"
|
|
||||||
)
|
|
||||||
),
|
),
|
||||||
smooth = TRUE,
|
smooth = TRUE,
|
||||||
smooth_color = "tomato3",
|
smooth_color = "tomato3",
|
||||||
lm_line = list(intercept = intercept, slope = slope),
|
lm_line = list(intercept = coef(lm_model)[1], slope = coef(lm_model)[2]),
|
||||||
legend_position = "right",
|
|
||||||
shape = 3,
|
shape = 3,
|
||||||
size = 0.5,
|
size = 0.5,
|
||||||
color_var = "Overlap",
|
color_var = "Overlap",
|
||||||
rectangles = list(
|
cyan_points = highlight_cyan
|
||||||
list(
|
|
||||||
xmin = -2, xmax = 2, ymin = -2, ymax = 2,
|
|
||||||
fill = NA, color = "grey20", alpha = 0.1
|
|
||||||
)
|
|
||||||
),
|
|
||||||
cyan_points = highlight_cyan, # Toggle cyan point highlighting
|
|
||||||
)
|
)
|
||||||
|
|
||||||
configs[[length(configs) + 1]] <- config
|
plots <- append(plots, list(config))
|
||||||
}
|
}
|
||||||
|
|
||||||
return(configs)
|
return(list(grid_layout = list(ncol = 3, nrow = 2), plots = plots))
|
||||||
}
|
}
|
||||||
|
|
||||||
main <- function() {
|
main <- function() {
|
||||||
@@ -1085,8 +943,6 @@ main <- function() {
|
|||||||
|
|
||||||
summary_vars <- c("L", "K", "r", "AUC", "delta_bg") # fields to filter and calculate summary stats across
|
summary_vars <- c("L", "K", "r", "AUC", "delta_bg") # fields to filter and calculate summary stats across
|
||||||
interaction_vars <- c("L", "K", "r", "AUC") # fields to calculate interaction z-scores
|
interaction_vars <- c("L", "K", "r", "AUC") # fields to calculate interaction z-scores
|
||||||
print_vars <- c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
|
|
||||||
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
|
|
||||||
|
|
||||||
message("Loading and filtering data for experiment: ", exp_name)
|
message("Loading and filtering data for experiment: ", exp_name)
|
||||||
df <- load_and_filter_data(args$easy_results_file, sd = exp_sd) %>%
|
df <- load_and_filter_data(args$easy_results_file, sd = exp_sd) %>%
|
||||||
@@ -1099,7 +955,7 @@ main <- function() {
|
|||||||
df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
|
df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
|
||||||
|
|
||||||
# Save some constants
|
# Save some constants
|
||||||
max_conc <- max(df$conc_num_factor_zeroed_num)
|
max_conc <- max(df$conc_num_factor)
|
||||||
l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
|
l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
|
||||||
k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
|
k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
|
||||||
|
|
||||||
@@ -1107,13 +963,13 @@ main <- function() {
|
|||||||
df_stats <- calculate_summary_stats(
|
df_stats <- calculate_summary_stats(
|
||||||
df = df,
|
df = df,
|
||||||
variables = summary_vars,
|
variables = summary_vars,
|
||||||
group_vars = c("conc_num", "conc_num_factor"))$df_with_stats
|
group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))$df_with_stats
|
||||||
|
|
||||||
message("Calculating summary statistics after quality control")
|
message("Calculating summary statistics after quality control")
|
||||||
ss <- calculate_summary_stats(
|
ss <- calculate_summary_stats(
|
||||||
df = df_na,
|
df = df_na,
|
||||||
variables = summary_vars,
|
variables = summary_vars,
|
||||||
group_vars = c("conc_num", "conc_num_factor"))
|
group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
||||||
df_na_ss <- ss$summary_stats
|
df_na_ss <- ss$summary_stats
|
||||||
df_na_stats <- ss$df_with_stats
|
df_na_stats <- ss$df_with_stats
|
||||||
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||||
@@ -1150,7 +1006,7 @@ main <- function() {
|
|||||||
df_no_zeros_stats <- calculate_summary_stats(
|
df_no_zeros_stats <- calculate_summary_stats(
|
||||||
df = df_no_zeros,
|
df = df_no_zeros,
|
||||||
variables = summary_vars,
|
variables = summary_vars,
|
||||||
group_vars = c("conc_num", "conc_num_factor")
|
group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor")
|
||||||
)$df_with_stats
|
)$df_with_stats
|
||||||
|
|
||||||
message("Filtering by 2SD of K")
|
message("Filtering by 2SD of K")
|
||||||
@@ -1161,13 +1017,13 @@ main <- function() {
|
|||||||
|
|
||||||
message("Calculating summary statistics for L within 2SD of K")
|
message("Calculating summary statistics for L within 2SD of K")
|
||||||
# TODO We're omitting the original z_max calculation, not sure if needed?
|
# TODO We're omitting the original z_max calculation, not sure if needed?
|
||||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))$summary_stats
|
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))$summary_stats
|
||||||
write.csv(ss,
|
write.csv(ss,
|
||||||
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2sd_K.csv"),
|
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2sd_K.csv"),
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
|
|
||||||
message("Calculating summary statistics for L outside 2SD of K")
|
message("Calculating summary statistics for L outside 2SD of K")
|
||||||
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
||||||
df_na_l_outside_2sd_k_stats <- ss$df_with_stats
|
df_na_l_outside_2sd_k_stats <- ss$df_with_stats
|
||||||
write.csv(ss$summary_stats,
|
write.csv(ss$summary_stats,
|
||||||
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"),
|
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"),
|
||||||
@@ -1175,54 +1031,64 @@ main <- function() {
|
|||||||
|
|
||||||
# Each plots list corresponds to a file
|
# Each plots list corresponds to a file
|
||||||
l_vs_k_plot_configs <- list(
|
l_vs_k_plot_configs <- list(
|
||||||
|
grid_layout = list(ncol = 1, nrow = 1),
|
||||||
|
plots = list(
|
||||||
list(
|
list(
|
||||||
df = df,
|
df = df,
|
||||||
x_var = "L",
|
x_var = "L",
|
||||||
y_var = "K",
|
y_var = "K",
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
delta_bg_point = TRUE,
|
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
||||||
title = "Raw L vs K before quality control",
|
title = "Raw L vs K before quality control",
|
||||||
color_var = "conc_num_factor_zeroed",
|
color_var = "conc_num_factor_factor",
|
||||||
error_bar = FALSE,
|
error_bar = FALSE,
|
||||||
legend_position = "right"
|
legend_position = "right"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
frequency_delta_bg_plot_configs <- list(
|
frequency_delta_bg_plot_configs <- list(
|
||||||
|
grid_layout = list(ncol = 1, nrow = 1),
|
||||||
|
plots = list(
|
||||||
list(
|
list(
|
||||||
df = df_stats,
|
df = df_stats,
|
||||||
x_var = "delta_bg",
|
x_var = "delta_bg",
|
||||||
y_var = NULL,
|
y_var = NULL,
|
||||||
plot_type = "density",
|
plot_type = "density",
|
||||||
title = "Density plot for Delta Background by [Drug] (All Data)",
|
title = "Density plot for Delta Background by [Drug] (All Data)",
|
||||||
color_var = "conc_num_factor_zeroed",
|
color_var = "conc_num_factor_factor",
|
||||||
x_label = "Delta Background",
|
x_label = "Delta Background",
|
||||||
y_label = "Density",
|
y_label = "Density",
|
||||||
error_bar = FALSE,
|
error_bar = FALSE,
|
||||||
legend_position = "right"),
|
legend_position = "right"
|
||||||
|
),
|
||||||
list(
|
list(
|
||||||
df = df_stats,
|
df = df_stats,
|
||||||
x_var = "delta_bg",
|
x_var = "delta_bg",
|
||||||
y_var = NULL,
|
y_var = NULL,
|
||||||
plot_type = "bar",
|
plot_type = "bar",
|
||||||
title = "Bar plot for Delta Background by [Drug] (All Data)",
|
title = "Bar plot for Delta Background by [Drug] (All Data)",
|
||||||
color_var = "conc_num_factor_zeroed",
|
color_var = "conc_num_factor_factor",
|
||||||
x_label = "Delta Background",
|
x_label = "Delta Background",
|
||||||
y_label = "Count",
|
y_label = "Count",
|
||||||
error_bar = FALSE,
|
error_bar = FALSE,
|
||||||
legend_position = "right")
|
legend_position = "right"
|
||||||
|
)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
above_threshold_plot_configs <- list(
|
above_threshold_plot_configs <- list(
|
||||||
|
grid_layout = list(ncol = 1, nrow = 1),
|
||||||
|
plots = list(
|
||||||
list(
|
list(
|
||||||
df = df_above_tolerance,
|
df = df_above_tolerance,
|
||||||
x_var = "L",
|
x_var = "L",
|
||||||
y_var = "K",
|
y_var = "K",
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
delta_bg_point = TRUE,
|
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
||||||
title = paste("Raw L vs K for strains above Delta Background threshold of",
|
title = paste("Raw L vs K for strains above Delta Background threshold of",
|
||||||
round(df_above_tolerance$delta_bg_tolerance[[1]], 3), "or above"),
|
round(df_above_tolerance$delta_bg_tolerance[[1]], 3), "or above"),
|
||||||
color_var = "conc_num_factor_zeroed",
|
color_var = "conc_num_factor_factor",
|
||||||
position = "jitter",
|
position = "jitter",
|
||||||
annotations = list(
|
annotations = list(
|
||||||
list(
|
list(
|
||||||
@@ -1235,6 +1101,7 @@ main <- function() {
|
|||||||
legend_position = "right"
|
legend_position = "right"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
|
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
|
||||||
variables = summary_vars,
|
variables = summary_vars,
|
||||||
@@ -1263,59 +1130,84 @@ main <- function() {
|
|||||||
)
|
)
|
||||||
|
|
||||||
l_outside_2sd_k_plot_configs <- list(
|
l_outside_2sd_k_plot_configs <- list(
|
||||||
|
grid_layout = list(ncol = 1, nrow = 1), # Ensures it's compatible with generate_and_save_plots
|
||||||
|
plots = list(
|
||||||
list(
|
list(
|
||||||
df = df_na_l_outside_2sd_k_stats,
|
df = df_na_l_outside_2sd_k_stats,
|
||||||
x_var = "L",
|
x_var = "L",
|
||||||
y_var = "K",
|
y_var = "K",
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
delta_bg_point = TRUE,
|
|
||||||
title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
|
title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
|
||||||
color_var = "conc_num_factor_zeroed",
|
color_var = "conc_num_factor_factor",
|
||||||
position = "jitter",
|
position = "jitter", # Apply jitter for better visibility
|
||||||
|
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
||||||
|
annotations = list(
|
||||||
|
list(
|
||||||
|
x = mean(df_na_l_outside_2sd_k_stats$L, na.rm = TRUE),
|
||||||
|
y = mean(df_na_l_outside_2sd_k_stats$K, na.rm = TRUE),
|
||||||
|
label = paste("Total strains:", nrow(df_na_l_outside_2sd_k_stats))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
error_bar = FALSE, # No error bars for this plot
|
||||||
legend_position = "right"
|
legend_position = "right"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
delta_bg_outside_2sd_k_plot_configs <- list(
|
delta_bg_outside_2sd_k_plot_configs <- list(
|
||||||
|
grid_layout = list(ncol = 1, nrow = 1), # Ensures it's compatible with generate_and_save_plots
|
||||||
|
plots = list(
|
||||||
list(
|
list(
|
||||||
df = df_na_l_outside_2sd_k_stats,
|
df = df_na_l_outside_2sd_k_stats,
|
||||||
x_var = "delta_bg",
|
x_var = "delta_bg",
|
||||||
y_var = "K",
|
y_var = "K",
|
||||||
plot_type = "scatter",
|
plot_type = "scatter",
|
||||||
gene_point = TRUE,
|
|
||||||
title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
|
title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
|
||||||
color_var = "conc_num_factor_zeroed",
|
color_var = "conc_num_factor_factor",
|
||||||
position = "jitter",
|
position = "jitter", # Apply jitter for better visibility
|
||||||
|
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
||||||
|
annotations = list(
|
||||||
|
list(
|
||||||
|
x = mean(df_na_l_outside_2sd_k_stats$delta_bg, na.rm = TRUE),
|
||||||
|
y = mean(df_na_l_outside_2sd_k_stats$K, na.rm = TRUE),
|
||||||
|
label = paste("Total strains:", nrow(df_na_l_outside_2sd_k_stats))
|
||||||
|
)
|
||||||
|
),
|
||||||
|
error_bar = FALSE, # No error bars for this plot
|
||||||
legend_position = "right"
|
legend_position = "right"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
message("Generating quality control plots in parallel")
|
message("Generating quality control plots in parallel")
|
||||||
# future::plan(future::multicore, workers = parallel::detectCores())
|
|
||||||
future::plan(future::multisession, workers = 3) # generate 3 plots in parallel
|
|
||||||
|
|
||||||
plot_configs <- list(
|
generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plot_configs)
|
||||||
list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control",
|
quit()
|
||||||
plot_configs = l_vs_k_plot_configs),
|
# # future::plan(future::multicore, workers = parallel::detectCores())
|
||||||
list(out_dir = out_dir_qc, filename = "frequency_delta_background",
|
# future::plan(future::multisession, workers = 3) # generate 3 plots in parallel
|
||||||
plot_configs = frequency_delta_bg_plot_configs),
|
|
||||||
list(out_dir = out_dir_qc, filename = "L_vs_K_above_threshold",
|
|
||||||
plot_configs = above_threshold_plot_configs),
|
|
||||||
list(out_dir = out_dir_qc, filename = "plate_analysis",
|
|
||||||
plot_configs = plate_analysis_plot_configs),
|
|
||||||
list(out_dir = out_dir_qc, filename = "plate_analysis_boxplots",
|
|
||||||
plot_configs = plate_analysis_boxplot_configs),
|
|
||||||
list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros",
|
|
||||||
plot_configs = plate_analysis_no_zeros_plot_configs),
|
|
||||||
list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros_boxplots",
|
|
||||||
plot_configs = plate_analysis_no_zeros_boxplot_configs),
|
|
||||||
list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
|
|
||||||
plot_configs = l_outside_2sd_k_plot_configs),
|
|
||||||
list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2sd_outside_mean_K",
|
|
||||||
plot_configs = delta_bg_outside_2sd_k_plot_configs)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Generating quality control plots in parallel
|
# plot_configs <- list(
|
||||||
|
# list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control",
|
||||||
|
# plot_configs = l_vs_k_plot_configs),
|
||||||
|
# list(out_dir = out_dir_qc, filename = "frequency_delta_background",
|
||||||
|
# plot_configs = frequency_delta_bg_plot_configs),
|
||||||
|
# list(out_dir = out_dir_qc, filename = "L_vs_K_above_threshold",
|
||||||
|
# plot_configs = above_threshold_plot_configs),
|
||||||
|
# list(out_dir = out_dir_qc, filename = "plate_analysis",
|
||||||
|
# plot_configs = plate_analysis_plot_configs),
|
||||||
|
# list(out_dir = out_dir_qc, filename = "plate_analysis_boxplots",
|
||||||
|
# plot_configs = plate_analysis_boxplot_configs),
|
||||||
|
# list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros",
|
||||||
|
# plot_configs = plate_analysis_no_zeros_plot_configs),
|
||||||
|
# list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros_boxplots",
|
||||||
|
# plot_configs = plate_analysis_no_zeros_boxplot_configs),
|
||||||
|
# list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
|
||||||
|
# plot_configs = l_outside_2sd_k_plot_configs),
|
||||||
|
# list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2sd_outside_mean_K",
|
||||||
|
# plot_configs = delta_bg_outside_2sd_k_plot_configs)
|
||||||
|
# )
|
||||||
|
|
||||||
|
# # Generating quality control plots in parallel
|
||||||
# furrr::future_map(plot_configs, function(config) {
|
# furrr::future_map(plot_configs, function(config) {
|
||||||
# generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
|
# generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
|
||||||
# }, .options = furrr_options(seed = TRUE))
|
# }, .options = furrr_options(seed = TRUE))
|
||||||
@@ -1340,7 +1232,7 @@ main <- function() {
|
|||||||
|
|
||||||
# Recalculate summary statistics for the background strain
|
# Recalculate summary statistics for the background strain
|
||||||
message("Calculating summary statistics for background strain")
|
message("Calculating summary statistics for background strain")
|
||||||
ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
||||||
summary_stats_bg <- ss_bg$summary_stats
|
summary_stats_bg <- ss_bg$summary_stats
|
||||||
write.csv(summary_stats_bg,
|
write.csv(summary_stats_bg,
|
||||||
file = file.path(out_dir, paste0("summary_stats_background_strain_", strain, ".csv")),
|
file = file.path(out_dir, paste0("summary_stats_background_strain_", strain, ".csv")),
|
||||||
@@ -1351,7 +1243,7 @@ main <- function() {
|
|||||||
df_reference <- df_na_stats %>% # formerly X2_RF
|
df_reference <- df_na_stats %>% # formerly X2_RF
|
||||||
filter(OrfRep == strain) %>%
|
filter(OrfRep == strain) %>%
|
||||||
filter(!is.na(L)) %>%
|
filter(!is.na(L)) %>%
|
||||||
group_by(conc_num, conc_num_factor) %>%
|
group_by(conc_num, conc_num_factor, conc_num_factor_factor) %>%
|
||||||
mutate(
|
mutate(
|
||||||
max_l_theoretical = max(max_L, na.rm = TRUE),
|
max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||||
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||||
@@ -1364,7 +1256,7 @@ main <- function() {
|
|||||||
filter(OrfRep != strain) %>%
|
filter(OrfRep != strain) %>%
|
||||||
filter(!is.na(L)) %>%
|
filter(!is.na(L)) %>%
|
||||||
mutate(SM = 0) %>%
|
mutate(SM = 0) %>%
|
||||||
group_by(conc_num, conc_num_factor) %>%
|
group_by(conc_num, conc_num_factor, conc_num_factor_factor) %>%
|
||||||
mutate(
|
mutate(
|
||||||
max_l_theoretical = max(max_L, na.rm = TRUE),
|
max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||||
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||||
@@ -1376,7 +1268,7 @@ main <- function() {
|
|||||||
df_reference_stats <- calculate_summary_stats(
|
df_reference_stats <- calculate_summary_stats(
|
||||||
df = df_reference,
|
df = df_reference,
|
||||||
variables = interaction_vars,
|
variables = interaction_vars,
|
||||||
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
|
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor")
|
||||||
)$df_with_stats
|
)$df_with_stats
|
||||||
reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene", "num"))
|
reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene", "num"))
|
||||||
zscore_calculations_reference <- reference_results$calculations
|
zscore_calculations_reference <- reference_results$calculations
|
||||||
@@ -1387,9 +1279,9 @@ main <- function() {
|
|||||||
df_deletion_stats <- calculate_summary_stats(
|
df_deletion_stats <- calculate_summary_stats(
|
||||||
df = df_deletion,
|
df = df_deletion,
|
||||||
variables = interaction_vars,
|
variables = interaction_vars,
|
||||||
group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")
|
group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor", "conc_num_factor_factor")
|
||||||
)$df_with_stats
|
)$df_with_stats
|
||||||
deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep"))
|
deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene"))
|
||||||
zscore_calculations <- deletion_results$calculations
|
zscore_calculations <- deletion_results$calculations
|
||||||
zscore_interactions <- deletion_results$interactions
|
zscore_interactions <- deletion_results$interactions
|
||||||
zscore_interactions_joined <- deletion_results$full_data
|
zscore_interactions_joined <- deletion_results$full_data
|
||||||
|
|||||||
Reference in New Issue
Block a user