Simplify plot config format
This commit is contained in:
@@ -112,7 +112,7 @@ theme_publication <- function(base_size = 14, base_family = "sans", legend_posit
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face = "italic", size = rel(1.3)),
|
||||
legend.text = element_text(size = rel(1.2)),
|
||||
plot.margin = unit(c(10, 5, 5, 5), "mm"),
|
||||
plot.margin = unit(c(10, 5, 5, 5), "mm")
|
||||
# strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
|
||||
# strip.text = element_text(face = "bold")
|
||||
)
|
||||
@@ -150,9 +150,8 @@ load_and_filter_data <- function(easy_results_file, sd = 3) {
|
||||
SM = 0,
|
||||
OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded?
|
||||
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
|
||||
conc_num_factor_new = factor(conc_num),
|
||||
conc_num_factor_zeroed = factor(as.numeric(conc_num_factor_new) - 1),
|
||||
conc_num_factor = as.numeric(conc_num_factor_zeroed) # for legacy purposes
|
||||
conc_num_factor = as.numeric(as.factor(conc_num)) - 1, # for legacy purposes
|
||||
conc_num_factor_factor = factor(conc_num)
|
||||
)
|
||||
|
||||
return(df)
|
||||
@@ -186,12 +185,12 @@ calculate_summary_stats <- function(df, variables, group_vars) {
|
||||
N = n(),
|
||||
across(all_of(variables),
|
||||
list(
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
||||
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
se = ~sd(., na.rm = TRUE) / sqrt(N - 1) # Bessel's correction
|
||||
mean = ~ mean(.x, na.rm = TRUE),
|
||||
median = ~ median(.x, na.rm = TRUE),
|
||||
max = ~ ifelse(all(is.na(.x)), NA, max(.x, na.rm = TRUE)),
|
||||
min = ~ ifelse(all(is.na(.x)), NA, min(.x, na.rm = TRUE)),
|
||||
sd = ~ sd(.x, na.rm = TRUE),
|
||||
se = ~ sd(.x, na.rm = TRUE) / sqrt(n() - 1)
|
||||
),
|
||||
.names = "{.fn}_{.col}"
|
||||
),
|
||||
@@ -335,7 +334,8 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
||||
# Declare column order for output
|
||||
calculations <- calculations %>%
|
||||
select(
|
||||
"OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "N",
|
||||
"OrfRep", "Gene", "num", "N",
|
||||
"conc_num", "conc_num_factor", "conc_num_factor_factor",
|
||||
"mean_L", "mean_K", "mean_r", "mean_AUC",
|
||||
"median_L", "median_K", "median_r", "median_AUC",
|
||||
"sd_L", "sd_K", "sd_r", "sd_AUC",
|
||||
@@ -352,7 +352,8 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
||||
|
||||
interactions <- interactions %>%
|
||||
select(
|
||||
"OrfRep", "Gene", "conc_num", "conc_num_factor", "num", "NG", "DB", "SM",
|
||||
"OrfRep", "Gene", "num", "NG", "DB", "SM",
|
||||
"conc_num", "conc_num_factor", "conc_num_factor_factor",
|
||||
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
||||
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
||||
"Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
|
||||
@@ -366,10 +367,11 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
||||
cleaned_df <- df %>%
|
||||
select(-any_of(
|
||||
setdiff(intersect(names(df), names(calculations)),
|
||||
c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))))
|
||||
c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))))
|
||||
|
||||
# Join the original dataframe with calculations
|
||||
df_with_calculations <- left_join(cleaned_df, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
||||
df_with_calculations <- left_join(cleaned_df, calculations,
|
||||
by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
||||
|
||||
# Remove overlapping columns between df_with_calculations and interactions
|
||||
df_with_calculations_clean <- df_with_calculations %>%
|
||||
@@ -378,7 +380,8 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
||||
c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))))
|
||||
|
||||
# Join with interactions to create the full dataset
|
||||
full_data <- left_join(df_with_calculations_clean, interactions, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
||||
full_data <- left_join(df_with_calculations_clean, interactions,
|
||||
by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
||||
|
||||
return(list(
|
||||
calculations = calculations,
|
||||
@@ -390,131 +393,67 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
|
||||
generate_and_save_plots <- function(out_dir, filename, plot_configs) {
|
||||
message("Generating ", filename, ".pdf and ", filename, ".html")
|
||||
|
||||
for (config_group in plot_configs) {
|
||||
plot_list <- config_group$plots
|
||||
grid_nrow <- config_group$grid_layout$nrow
|
||||
grid_ncol <- config_group$grid_layout$ncol
|
||||
grid_nrow <- ifelse(is.null(plot_configs$grid_layout$nrow), 1, plot_configs$grid_layout$nrow)
|
||||
grid_ncol <- ifelse(is.null(plot_configs$grid_layout$ncol), length(plot_configs$plots), plot_configs$grid_layout$ncol)
|
||||
|
||||
# Set defaults if nrow or ncol are not provided
|
||||
if (is.null(grid_nrow) || is.null(grid_ncol)) {
|
||||
num_plots <- length(plot_list)
|
||||
grid_nrow <- ifelse(is.null(grid_nrow), 1, grid_nrow)
|
||||
grid_ncol <- ifelse(is.null(grid_ncol), num_plots, grid_ncol)
|
||||
}
|
||||
|
||||
# Prepare lists to collect static and interactive plots
|
||||
static_plots <- list()
|
||||
plotly_plots <- list()
|
||||
|
||||
# Generate each individual plot based on the configuration
|
||||
for (i in seq_along(plot_list)) {
|
||||
config <- plot_list[[i]]
|
||||
for (i in seq_along(plot_configs$plots)) {
|
||||
config <- plot_configs$plots[[i]]
|
||||
df <- config$df
|
||||
|
||||
# Create the base plot
|
||||
# Define aes_mapping and handle color_var defaulting
|
||||
aes_mapping <- if (config$plot_type == "bar") {
|
||||
if (!is.null(config$color_var)) {
|
||||
aes(x = .data[[config$x_var]], fill = as.factor(.data[[config$color_var]]), color = as.factor(.data[[config$color_var]]))
|
||||
} else {
|
||||
aes(x = .data[[config$x_var]])
|
||||
}
|
||||
aes(x = .data[[config$x_var]], fill = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"),
|
||||
color = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"))
|
||||
} else if (config$plot_type == "density") {
|
||||
if (!is.null(config$color_var)) {
|
||||
aes(x = .data[[config$x_var]], color = as.factor(.data[[config$color_var]]))
|
||||
aes(x = .data[[config$x_var]], color = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"))
|
||||
} else {
|
||||
aes(x = .data[[config$x_var]])
|
||||
}
|
||||
} else {
|
||||
if (!is.null(config$color_var)) {
|
||||
aes(x = .data[[config$x_var]], y = .data[[config$y_var]], color = as.factor(.data[[config$color_var]]))
|
||||
} else {
|
||||
aes(x = .data[[config$x_var]], y = .data[[config$y_var]])
|
||||
}
|
||||
aes(x = .data[[config$x_var]], y = .data[[config$y_var]], color = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"))
|
||||
}
|
||||
|
||||
plot <- ggplot(df, aes_mapping)
|
||||
# Apply theme_publication with legend_position
|
||||
plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position)
|
||||
|
||||
# Apply theme_publication with legend_position from config
|
||||
legend_position <- if (!is.null(config$legend_position)) config$legend_position else "bottom"
|
||||
plot <- plot + theme_publication(legend_position = legend_position)
|
||||
|
||||
# Use appropriate helper function based on plot type
|
||||
# Apply appropriate plot function
|
||||
plot <- switch(config$plot_type,
|
||||
"scatter" = generate_scatter_plot(plot, config),
|
||||
"box" = generate_box_plot(plot, config),
|
||||
"density" = plot + geom_density(),
|
||||
"bar" = plot + geom_bar(),
|
||||
plot # default case if no type matches
|
||||
plot # default case
|
||||
)
|
||||
|
||||
# Add title and labels
|
||||
if (!is.null(config$title)) {
|
||||
plot <- plot + ggtitle(config$title)
|
||||
}
|
||||
if (!is.null(config$x_label)) {
|
||||
plot <- plot + xlab(config$x_label)
|
||||
}
|
||||
if (!is.null(config$y_label)) {
|
||||
plot <- plot + ylab(config$y_label)
|
||||
}
|
||||
|
||||
# Add cartesian coordinates if specified
|
||||
if (!is.null(config$coord_cartesian)) {
|
||||
plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
|
||||
}
|
||||
|
||||
# Apply scale_color_discrete(guide = FALSE) when color_var is NULL
|
||||
if (is.null(config$color_var)) {
|
||||
plot <- plot + scale_color_discrete(guide = "none")
|
||||
}
|
||||
# Add titles and labels
|
||||
if (!is.null(config$title)) plot <- plot + ggtitle(config$title)
|
||||
if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label)
|
||||
if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
|
||||
if (!is.null(config$coord_cartesian)) plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
|
||||
|
||||
# Add interactive tooltips for plotly
|
||||
tooltip_vars <- c()
|
||||
if (config$plot_type == "scatter") {
|
||||
if (!is.null(config$delta_bg_point) && config$delta_bg_point) {
|
||||
tooltip_vars <- c(tooltip_vars, "OrfRep", "Gene", "delta_bg")
|
||||
} else if (!is.null(config$gene_point) && config$gene_point) {
|
||||
tooltip_vars <- c(tooltip_vars, "OrfRep", "Gene")
|
||||
} else if (!is.null(config$y_var) && !is.null(config$x_var)) {
|
||||
tooltip_vars <- c(config$x_var, config$y_var)
|
||||
}
|
||||
}
|
||||
tooltip_vars <- ifelse(!is.null(config$tooltip_vars), config$tooltip_vars, "none")
|
||||
plotly_plot <- suppressWarnings(plotly::ggplotly(plot, tooltip = tooltip_vars))
|
||||
|
||||
# Convert to plotly object and suppress warnings here
|
||||
plotly_plot <- suppressWarnings({
|
||||
if (length(tooltip_vars) > 0) {
|
||||
plotly::ggplotly(plot, tooltip = tooltip_vars)
|
||||
} else {
|
||||
plotly::ggplotly(plot, tooltip = "none")
|
||||
}
|
||||
})
|
||||
|
||||
# Adjust legend position if specified
|
||||
# Adjust legend position in plotly
|
||||
if (!is.null(config$legend_position) && config$legend_position == "bottom") {
|
||||
plotly_plot <- plotly_plot %>% layout(legend = list(orientation = "h"))
|
||||
}
|
||||
|
||||
# Add plots to lists
|
||||
# Add static and interactive plots to lists
|
||||
static_plots[[i]] <- plot
|
||||
plotly_plots[[i]] <- plotly_plot
|
||||
}
|
||||
|
||||
# Save static PDF plot(s) for the current grid
|
||||
# Save static PDF plots
|
||||
pdf(file.path(out_dir, paste0(filename, ".pdf")), width = 16, height = 9)
|
||||
grid.arrange(grobs = static_plots, ncol = grid_ncol, nrow = grid_nrow)
|
||||
dev.off()
|
||||
|
||||
combined_plot <- plotly::subplot(
|
||||
plotly_plots,
|
||||
nrows = grid_nrow,
|
||||
ncols = grid_ncol,
|
||||
margin = 0.05
|
||||
)
|
||||
|
||||
# Save combined HTML plot(s)
|
||||
# Save combined interactive HTML plots
|
||||
combined_plot <- plotly::subplot(plotly_plots, nrows = grid_nrow, ncols = grid_ncol, margin = 0.05)
|
||||
html_file <- file.path(out_dir, paste0(filename, ".html"))
|
||||
saveWidget(combined_plot, file = html_file, selfcontained = TRUE)
|
||||
}
|
||||
}
|
||||
|
||||
generate_scatter_plot <- function(plot, config) {
|
||||
@@ -671,51 +610,39 @@ generate_box_plot <- function(plot, config) {
|
||||
return(plot)
|
||||
}
|
||||
|
||||
generate_plate_analysis_plot_configs <- function(variables, stages = c("before", "after"),
|
||||
df_before = NULL, df_after = NULL, plot_type = "scatter") {
|
||||
generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df_after = NULL,
|
||||
plot_type = "scatter", stages = c("before", "after")) {
|
||||
plots <- list()
|
||||
|
||||
for (var in variables) {
|
||||
for (stage in stages) {
|
||||
df_plot <- if (stage == "before") df_before else df_after
|
||||
|
||||
# Check for non-finite values in the y-variable
|
||||
df_plot_filtered <- df_plot %>%
|
||||
filter(is.finite(!!sym(var)))
|
||||
|
||||
# Count removed rows
|
||||
removed_rows <- nrow(df_plot) - nrow(df_plot_filtered)
|
||||
if (removed_rows > 0) {
|
||||
message(sprintf("Removed %d non-finite values for variable %s during stage %s", removed_rows, var, stage))
|
||||
}
|
||||
df_plot_filtered <- df_plot %>% filter(is.finite(!!sym(var)))
|
||||
|
||||
# Adjust settings based on plot_type
|
||||
if (plot_type == "scatter") {
|
||||
error_bar <- TRUE
|
||||
position <- "jitter"
|
||||
} else if (plot_type == "box") {
|
||||
error_bar <- FALSE
|
||||
position <- NULL
|
||||
}
|
||||
|
||||
config <- list(
|
||||
df = df_plot,
|
||||
df = df_plot_filtered,
|
||||
x_var = "scan",
|
||||
y_var = var,
|
||||
plot_type = plot_type,
|
||||
title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
|
||||
error_bar = error_bar,
|
||||
color_var = "conc_num_factor_zeroed",
|
||||
position = position,
|
||||
size = 0.2
|
||||
color_var = "conc_num_factor_factor",
|
||||
position = if (plot_type == "scatter") "jitter" else NULL,
|
||||
size = 0.2,
|
||||
error_bar = (plot_type == "scatter")
|
||||
)
|
||||
|
||||
# Add config to plots list
|
||||
plots <- append(plots, list(config))
|
||||
}
|
||||
}
|
||||
return(plots)
|
||||
|
||||
return(list(grid_layout = list(ncol = 1, nrow = length(plots)), plots = plots))
|
||||
}
|
||||
|
||||
generate_interaction_plot_configs <- function(df, limits_map = NULL, plot_type = "reference") {
|
||||
# Define limits if not provided
|
||||
if (is.null(limits_map)) {
|
||||
limits_map <- list(
|
||||
L = c(0, 130),
|
||||
@@ -729,94 +656,74 @@ generate_interaction_plot_configs <- function(df, limits_map = NULL, plot_type =
|
||||
)
|
||||
}
|
||||
|
||||
# Define grouping variables and filter data based on plot type
|
||||
if (plot_type == "reference") {
|
||||
group_vars <- c("OrfRep", "Gene", "num")
|
||||
group_vars <- if (plot_type == "reference") c("OrfRep", "Gene", "num") else c("OrfRep", "Gene")
|
||||
df_filtered <- df %>%
|
||||
mutate(
|
||||
OrfRepCombined = paste(OrfRep, Gene, num, sep = "_")
|
||||
)
|
||||
} else if (plot_type == "deletion") {
|
||||
group_vars <- c("OrfRep", "Gene")
|
||||
df_filtered <- df %>%
|
||||
mutate(
|
||||
OrfRepCombined = paste(OrfRep, Gene, sep = "_") # Compare by OrfRep and Gene for deletion
|
||||
)
|
||||
}
|
||||
mutate(OrfRepCombined = if (plot_type == "reference") paste(OrfRep, Gene, num, sep = "_") else paste(OrfRep, Gene, sep = "_"))
|
||||
|
||||
# Create a list to store all configs
|
||||
configs <- list()
|
||||
plots <- list()
|
||||
|
||||
# Generate the first 8 scatter/box plots for L, K, r, AUC (shared between reference and deletion)
|
||||
overall_vars <- c("L", "K", "r", "AUC")
|
||||
for (var in overall_vars) {
|
||||
# Generate plots for overall variables (L, K, r, AUC)
|
||||
for (var in c("L", "K", "r", "AUC")) {
|
||||
y_limits <- limits_map[[var]]
|
||||
|
||||
config <- list(
|
||||
df = df_filtered,
|
||||
plot_type = "scatter",
|
||||
x_var = "conc_num_factor_new",
|
||||
x_var = "conc_num_factor_factor",
|
||||
y_var = var,
|
||||
x_label = unique(df_filtered$Drug)[1],
|
||||
title = sprintf("Scatter RF for %s with SD", var),
|
||||
coord_cartesian = y_limits,
|
||||
error_bar = TRUE,
|
||||
x_breaks = unique(df_filtered$conc_num_factor_new),
|
||||
x_breaks = unique(df_filtered$conc_num_factor_factor),
|
||||
x_labels = as.character(unique(df_filtered$conc_num)),
|
||||
grid_layout = list(ncol = 2, nrow = 2),
|
||||
position = "jitter"
|
||||
position = "jitter",
|
||||
smooth = TRUE
|
||||
)
|
||||
configs <- append(configs, list(config))
|
||||
plots <- append(plots, list(config))
|
||||
}
|
||||
|
||||
# Generate Delta comparison plots (4x3 grid for deletion and reference)
|
||||
# Generate Delta comparison plots
|
||||
unique_groups <- df_filtered %>% select(all_of(group_vars)) %>% distinct()
|
||||
|
||||
for (i in seq_len(nrow(unique_groups))) {
|
||||
group <- unique_groups[i, ]
|
||||
group_data <- df_filtered %>% filter(across(all_of(group_vars), ~ . == group[[cur_column()]]))
|
||||
group_data <- df_filtered %>% semi_join(group, by = group_vars)
|
||||
|
||||
OrfRep <- as.character(group$OrfRep)
|
||||
Gene <- if ("Gene" %in% names(group)) as.character(group$Gene) else ""
|
||||
num <- if ("num" %in% names(group)) as.character(group$num) else ""
|
||||
OrfRepCombined <- paste(OrfRep, Gene, num, sep = "_")
|
||||
|
||||
# Generate plots for Delta variables
|
||||
delta_vars <- c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC")
|
||||
for (var in delta_vars) {
|
||||
for (var in c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC")) {
|
||||
y_limits <- limits_map[[var]]
|
||||
upper_y <- y_limits[2]
|
||||
lower_y <- y_limits[1]
|
||||
y_span <- upper_y - lower_y
|
||||
y_span <- y_limits[2] - y_limits[1]
|
||||
|
||||
# Get WT_sd_var for error bar calculations
|
||||
# Error bars
|
||||
WT_sd_var <- paste0("WT_sd_", sub("Delta_", "", var))
|
||||
WT_sd_value <- group_data[[WT_sd_var]][1]
|
||||
error_bar_ymin <- 0 - (2 * WT_sd_value)
|
||||
error_bar_ymax <- 0 + (2 * WT_sd_value)
|
||||
|
||||
# Set annotations (Z_Shifts, lm Z-Scores, NG, DB, SM values)
|
||||
Z_Shift_var <- paste0("Z_Shift_", sub("Delta_", "", var))
|
||||
Z_lm_var <- paste0("Z_lm_", sub("Delta_", "", var))
|
||||
Z_Shift_value <- round(group_data[[Z_Shift_var]][1], 2)
|
||||
Z_lm_value <- round(group_data[[Z_lm_var]][1], 2)
|
||||
# Annotations
|
||||
Z_Shift_value <- round(group_data[[paste0("Z_Shift_", sub("Delta_", "", var))]][1], 2)
|
||||
Z_lm_value <- round(group_data[[paste0("Z_lm_", sub("Delta_", "", var))]][1], 2)
|
||||
NG_value <- group_data$NG[1]
|
||||
DB_value <- group_data$DB[1]
|
||||
SM_value <- group_data$SM[1]
|
||||
|
||||
annotations <- list(
|
||||
list(x = 1, y = upper_y - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)),
|
||||
list(x = 1, y = upper_y - 0.3 * y_span, label = paste("lm ZScore =", Z_lm_value)),
|
||||
list(x = 1, y = lower_y + 0.2 * y_span, label = paste("NG =", NG_value)),
|
||||
list(x = 1, y = lower_y + 0.1 * y_span, label = paste("DB =", DB_value)),
|
||||
list(x = 1, y = lower_y, label = paste("SM =", SM_value))
|
||||
list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)),
|
||||
list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste("lm ZScore =", Z_lm_value)),
|
||||
list(x = 1, y = y_limits[1] + 0.2 * y_span, label = paste("NG =", NG_value)),
|
||||
list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("DB =", DB_value)),
|
||||
list(x = 1, y = y_limits[1], label = paste("SM =", SM_value))
|
||||
)
|
||||
|
||||
# Create configuration for each Delta plot
|
||||
config <- list(
|
||||
df = group_data,
|
||||
plot_type = "scatter",
|
||||
x_var = "conc_num",
|
||||
x_var = "conc_num_factor_factor",
|
||||
y_var = var,
|
||||
x_label = unique(group_data$Drug)[1],
|
||||
title = paste(OrfRep, Gene, sep = " "),
|
||||
@@ -827,23 +734,21 @@ generate_interaction_plot_configs <- function(df, limits_map = NULL, plot_type =
|
||||
ymin = error_bar_ymin,
|
||||
ymax = error_bar_ymax
|
||||
),
|
||||
lm_smooth = TRUE,
|
||||
x_breaks = unique(group_data$conc_num_factor_new),
|
||||
smooth = TRUE,
|
||||
x_breaks = unique(group_data$conc_num_factor_factor),
|
||||
x_labels = as.character(unique(group_data$conc_num)),
|
||||
ylim_vals = y_limits,
|
||||
grid_layout = list(ncol = 4, nrow = 3) # Adjust grid layout for gene-gene comparisons
|
||||
grid_layout = list(ncol = 4, nrow = 3)
|
||||
)
|
||||
configs <- append(configs, list(config))
|
||||
plots <- append(plots, list(config))
|
||||
}
|
||||
}
|
||||
|
||||
return(configs)
|
||||
return(list(grid_layout = list(ncol = 2, nrow = 2), plots = plots))
|
||||
}
|
||||
|
||||
generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) {
|
||||
|
||||
sd_bands <- c(1, 2, 3)
|
||||
|
||||
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
||||
z_lm_cols <- paste0("Z_lm_", variables)
|
||||
rank_avg_zscore_cols <- paste0("Rank_", variables)
|
||||
@@ -851,8 +756,8 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
||||
|
||||
configs <- list()
|
||||
|
||||
# Adjust values if necessary
|
||||
if (adjust) {
|
||||
message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
|
||||
df <- df %>%
|
||||
mutate(
|
||||
across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
|
||||
@@ -860,34 +765,22 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
||||
)
|
||||
}
|
||||
|
||||
message("Calculating ranks for Avg_Zscore and Z_lm columns")
|
||||
rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
|
||||
# Calculate rank columns for Avg_Zscore and Z_lm columns
|
||||
df_ranked <- df %>%
|
||||
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
|
||||
mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = paste0("Rank_", avg_zscore_cols))) %>%
|
||||
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = paste0("Rank_lm_", z_lm_cols)))
|
||||
|
||||
rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
|
||||
df_ranked <- df_ranked %>%
|
||||
mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
|
||||
|
||||
# SD-based plots for L and K
|
||||
# Generate plots for SD-based L and K variables
|
||||
for (variable in c("L", "K")) {
|
||||
|
||||
if (is_lm) {
|
||||
rank_var <- paste0("Rank_lm_", variable)
|
||||
zscore_var <- paste0("Z_lm_", variable)
|
||||
y_label <- paste("Int Z score", variable)
|
||||
} else {
|
||||
rank_var <- paste0("Rank_", variable)
|
||||
zscore_var <- paste0("Avg_Zscore_", variable)
|
||||
y_label <- paste("Avg Z score", variable)
|
||||
}
|
||||
rank_var <- if (is_lm) paste0("Rank_lm_", variable) else paste0("Rank_", variable)
|
||||
zscore_var <- if (is_lm) paste0("Z_lm_", variable) else paste0("Avg_Zscore_", variable)
|
||||
y_label <- if (is_lm) paste("Int Z score", variable) else paste("Avg Z score", variable)
|
||||
|
||||
for (sd_band in sd_bands) {
|
||||
|
||||
num_enhancers <- sum(df_ranked[[zscore_var]] >= sd_band, na.rm = TRUE)
|
||||
num_suppressors <- sum(df_ranked[[zscore_var]] <= -sd_band, na.rm = TRUE)
|
||||
|
||||
# Annotated plot configuration
|
||||
# Plot with annotations
|
||||
configs[[length(configs) + 1]] <- list(
|
||||
df = df_ranked,
|
||||
x_var = rank_var,
|
||||
@@ -903,27 +796,22 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
||||
list(
|
||||
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||
y = max(df_ranked[[zscore_var]], na.rm = TRUE) * 0.9,
|
||||
label = paste("Deletion Enhancers =", num_enhancers),
|
||||
hjust = 0.5,
|
||||
vjust = 1
|
||||
label = paste("Deletion Enhancers =", num_enhancers)
|
||||
),
|
||||
list(
|
||||
x = median(df_ranked[[rank_var]], na.rm = TRUE),
|
||||
y = min(df_ranked[[zscore_var]], na.rm = TRUE) * 0.9,
|
||||
label = paste("Deletion Suppressors =", num_suppressors),
|
||||
hjust = 0.5,
|
||||
vjust = 0
|
||||
label = paste("Deletion Suppressors =", num_suppressors)
|
||||
)
|
||||
),
|
||||
shape = 3,
|
||||
size = 0.1,
|
||||
y_label = y_label,
|
||||
x_label = "Rank",
|
||||
legend_position = "none",
|
||||
grid_layout = list(ncol = 3, nrow = 2)
|
||||
legend_position = "none"
|
||||
)
|
||||
|
||||
# Non-Annotated Plot Configuration
|
||||
# Plot without annotations
|
||||
configs[[length(configs) + 1]] <- list(
|
||||
df = df_ranked,
|
||||
x_var = rank_var,
|
||||
@@ -940,42 +828,26 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
||||
size = 0.1,
|
||||
y_label = y_label,
|
||||
x_label = "Rank",
|
||||
legend_position = "none",
|
||||
grid_layout = list(ncol = 3, nrow = 2)
|
||||
legend_position = "none"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
# Avg ZScore and Rank Avg ZScore Plots for variables
|
||||
# Generate Avg ZScore and Rank Avg ZScore plots for each variable
|
||||
for (variable in variables) {
|
||||
for (plot_type in c("Avg Zscore vs lm", "Rank Avg Zscore vs lm")) {
|
||||
|
||||
title <- paste(plot_type, variable)
|
||||
|
||||
# Define specific variables based on plot type
|
||||
if (plot_type == "Avg Zscore vs lm") {
|
||||
x_var <- paste0("Avg_Zscore_", variable)
|
||||
y_var <- paste0("Z_lm_", variable)
|
||||
rectangles <- list(
|
||||
list(xmin = -2, xmax = 2, ymin = -2, ymax = 2,
|
||||
fill = NA, color = "grey20", alpha = 0.1
|
||||
)
|
||||
)
|
||||
} else if (plot_type == "Rank Avg Zscore vs lm") {
|
||||
x_var <- paste0("Rank_", variable)
|
||||
y_var <- paste0("Rank_lm_", variable)
|
||||
rectangles <- NULL
|
||||
}
|
||||
x_var <- if (plot_type == "Avg Zscore vs lm") paste0("Avg_Zscore_", variable) else paste0("Rank_", variable)
|
||||
y_var <- if (plot_type == "Avg Zscore vs lm") paste0("Z_lm_", variable) else paste0("Rank_lm_", variable)
|
||||
|
||||
# Fit the linear model
|
||||
lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_ranked)
|
||||
|
||||
# Extract intercept, slope, and R-squared from the model coefficients
|
||||
intercept <- coef(lm_model)[1]
|
||||
slope <- coef(lm_model)[2]
|
||||
r_squared <- summary(lm_model)$r.squared
|
||||
|
||||
# Annotations: include R-squared in the plot
|
||||
annotations <- list(
|
||||
list(
|
||||
x = mean(range(df_ranked[[x_var]], na.rm = TRUE)),
|
||||
@@ -987,6 +859,12 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
||||
)
|
||||
)
|
||||
|
||||
rectangles <- if (plot_type == "Avg Zscore vs lm") {
|
||||
list(list(xmin = -2, xmax = 2, ymin = -2, ymax = 2, fill = NA, color = "grey20", alpha = 0.1))
|
||||
} else {
|
||||
NULL
|
||||
}
|
||||
|
||||
configs[[length(configs) + 1]] <- list(
|
||||
df = df_ranked,
|
||||
x_var = x_var,
|
||||
@@ -1007,11 +885,11 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
|
||||
)
|
||||
}
|
||||
}
|
||||
return(configs)
|
||||
|
||||
return(list(grid_layout = list(ncol = 3, nrow = 2), plots = configs))
|
||||
}
|
||||
|
||||
generate_correlation_plot_configs <- function(df, highlight_cyan = FALSE) {
|
||||
# Define relationships for plotting
|
||||
relationships <- list(
|
||||
list(x = "Z_lm_L", y = "Z_lm_K", label = "Interaction L vs. Interaction K"),
|
||||
list(x = "Z_lm_L", y = "Z_lm_r", label = "Interaction L vs. Interaction r"),
|
||||
@@ -1021,56 +899,36 @@ generate_correlation_plot_configs <- function(df, highlight_cyan = FALSE) {
|
||||
list(x = "Z_lm_r", y = "Z_lm_AUC", label = "Interaction r vs. Interaction AUC")
|
||||
)
|
||||
|
||||
configs <- list()
|
||||
plots <- list()
|
||||
|
||||
for (rel in relationships) {
|
||||
# Fit linear model
|
||||
lm_model <- lm(as.formula(paste(rel$y, "~", rel$x)), data = df)
|
||||
lm_summary <- summary(lm_model)
|
||||
intercept <- coef(lm_model)[1]
|
||||
slope <- coef(lm_model)[2]
|
||||
r_squared <- lm_summary$r.squared
|
||||
r_squared <- summary(lm_model)$r.squared
|
||||
|
||||
# Construct plot configuration
|
||||
config <- list(
|
||||
df = df,
|
||||
x_var = rel$x,
|
||||
y_var = rel$y,
|
||||
plot_type = "scatter",
|
||||
title = rel$label,
|
||||
x_label = paste("z-score", gsub("Z_lm_", "", rel$x)),
|
||||
y_label = paste("z-score", gsub("Z_lm_", "", rel$y)),
|
||||
annotations = list(
|
||||
list(
|
||||
x = mean(range(df[[rel$x]], na.rm = TRUE)),
|
||||
y = mean(range(df[[rel$y]], na.rm = TRUE)),
|
||||
label = paste("R-squared =", round(r_squared, 3)),
|
||||
hjust = 0.5,
|
||||
vjust = 1,
|
||||
size = 5,
|
||||
color = "black"
|
||||
)
|
||||
list(x = mean(df[[rel$x]], na.rm = TRUE),
|
||||
y = mean(df[[rel$y]], na.rm = TRUE),
|
||||
label = paste("R-squared =", round(r_squared, 3)))
|
||||
),
|
||||
smooth = TRUE,
|
||||
smooth_color = "tomato3",
|
||||
lm_line = list(intercept = intercept, slope = slope),
|
||||
legend_position = "right",
|
||||
lm_line = list(intercept = coef(lm_model)[1], slope = coef(lm_model)[2]),
|
||||
shape = 3,
|
||||
size = 0.5,
|
||||
color_var = "Overlap",
|
||||
rectangles = list(
|
||||
list(
|
||||
xmin = -2, xmax = 2, ymin = -2, ymax = 2,
|
||||
fill = NA, color = "grey20", alpha = 0.1
|
||||
)
|
||||
),
|
||||
cyan_points = highlight_cyan, # Toggle cyan point highlighting
|
||||
cyan_points = highlight_cyan
|
||||
)
|
||||
|
||||
configs[[length(configs) + 1]] <- config
|
||||
plots <- append(plots, list(config))
|
||||
}
|
||||
|
||||
return(configs)
|
||||
return(list(grid_layout = list(ncol = 3, nrow = 2), plots = plots))
|
||||
}
|
||||
|
||||
main <- function() {
|
||||
@@ -1085,8 +943,6 @@ main <- function() {
|
||||
|
||||
summary_vars <- c("L", "K", "r", "AUC", "delta_bg") # fields to filter and calculate summary stats across
|
||||
interaction_vars <- c("L", "K", "r", "AUC") # fields to calculate interaction z-scores
|
||||
print_vars <- c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
|
||||
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
|
||||
|
||||
message("Loading and filtering data for experiment: ", exp_name)
|
||||
df <- load_and_filter_data(args$easy_results_file, sd = exp_sd) %>%
|
||||
@@ -1099,7 +955,7 @@ main <- function() {
|
||||
df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
|
||||
|
||||
# Save some constants
|
||||
max_conc <- max(df$conc_num_factor_zeroed_num)
|
||||
max_conc <- max(df$conc_num_factor)
|
||||
l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
|
||||
k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
|
||||
|
||||
@@ -1107,13 +963,13 @@ main <- function() {
|
||||
df_stats <- calculate_summary_stats(
|
||||
df = df,
|
||||
variables = summary_vars,
|
||||
group_vars = c("conc_num", "conc_num_factor"))$df_with_stats
|
||||
group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))$df_with_stats
|
||||
|
||||
message("Calculating summary statistics after quality control")
|
||||
ss <- calculate_summary_stats(
|
||||
df = df_na,
|
||||
variables = summary_vars,
|
||||
group_vars = c("conc_num", "conc_num_factor"))
|
||||
group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
||||
df_na_ss <- ss$summary_stats
|
||||
df_na_stats <- ss$df_with_stats
|
||||
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||
@@ -1150,7 +1006,7 @@ main <- function() {
|
||||
df_no_zeros_stats <- calculate_summary_stats(
|
||||
df = df_no_zeros,
|
||||
variables = summary_vars,
|
||||
group_vars = c("conc_num", "conc_num_factor")
|
||||
group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor")
|
||||
)$df_with_stats
|
||||
|
||||
message("Filtering by 2SD of K")
|
||||
@@ -1161,13 +1017,13 @@ main <- function() {
|
||||
|
||||
message("Calculating summary statistics for L within 2SD of K")
|
||||
# TODO We're omitting the original z_max calculation, not sure if needed?
|
||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))$summary_stats
|
||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))$summary_stats
|
||||
write.csv(ss,
|
||||
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2sd_K.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
message("Calculating summary statistics for L outside 2SD of K")
|
||||
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
||||
df_na_l_outside_2sd_k_stats <- ss$df_with_stats
|
||||
write.csv(ss$summary_stats,
|
||||
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"),
|
||||
@@ -1175,54 +1031,64 @@ main <- function() {
|
||||
|
||||
# Each plots list corresponds to a file
|
||||
l_vs_k_plot_configs <- list(
|
||||
grid_layout = list(ncol = 1, nrow = 1),
|
||||
plots = list(
|
||||
list(
|
||||
df = df,
|
||||
x_var = "L",
|
||||
y_var = "K",
|
||||
plot_type = "scatter",
|
||||
delta_bg_point = TRUE,
|
||||
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
||||
title = "Raw L vs K before quality control",
|
||||
color_var = "conc_num_factor_zeroed",
|
||||
color_var = "conc_num_factor_factor",
|
||||
error_bar = FALSE,
|
||||
legend_position = "right"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
frequency_delta_bg_plot_configs <- list(
|
||||
grid_layout = list(ncol = 1, nrow = 1),
|
||||
plots = list(
|
||||
list(
|
||||
df = df_stats,
|
||||
x_var = "delta_bg",
|
||||
y_var = NULL,
|
||||
plot_type = "density",
|
||||
title = "Density plot for Delta Background by [Drug] (All Data)",
|
||||
color_var = "conc_num_factor_zeroed",
|
||||
color_var = "conc_num_factor_factor",
|
||||
x_label = "Delta Background",
|
||||
y_label = "Density",
|
||||
error_bar = FALSE,
|
||||
legend_position = "right"),
|
||||
legend_position = "right"
|
||||
),
|
||||
list(
|
||||
df = df_stats,
|
||||
x_var = "delta_bg",
|
||||
y_var = NULL,
|
||||
plot_type = "bar",
|
||||
title = "Bar plot for Delta Background by [Drug] (All Data)",
|
||||
color_var = "conc_num_factor_zeroed",
|
||||
color_var = "conc_num_factor_factor",
|
||||
x_label = "Delta Background",
|
||||
y_label = "Count",
|
||||
error_bar = FALSE,
|
||||
legend_position = "right")
|
||||
legend_position = "right"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
above_threshold_plot_configs <- list(
|
||||
grid_layout = list(ncol = 1, nrow = 1),
|
||||
plots = list(
|
||||
list(
|
||||
df = df_above_tolerance,
|
||||
x_var = "L",
|
||||
y_var = "K",
|
||||
plot_type = "scatter",
|
||||
delta_bg_point = TRUE,
|
||||
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
||||
title = paste("Raw L vs K for strains above Delta Background threshold of",
|
||||
round(df_above_tolerance$delta_bg_tolerance[[1]], 3), "or above"),
|
||||
color_var = "conc_num_factor_zeroed",
|
||||
color_var = "conc_num_factor_factor",
|
||||
position = "jitter",
|
||||
annotations = list(
|
||||
list(
|
||||
@@ -1235,6 +1101,7 @@ main <- function() {
|
||||
legend_position = "right"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
|
||||
variables = summary_vars,
|
||||
@@ -1263,59 +1130,84 @@ main <- function() {
|
||||
)
|
||||
|
||||
l_outside_2sd_k_plot_configs <- list(
|
||||
grid_layout = list(ncol = 1, nrow = 1), # Ensures it's compatible with generate_and_save_plots
|
||||
plots = list(
|
||||
list(
|
||||
df = df_na_l_outside_2sd_k_stats,
|
||||
x_var = "L",
|
||||
y_var = "K",
|
||||
plot_type = "scatter",
|
||||
delta_bg_point = TRUE,
|
||||
title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
|
||||
color_var = "conc_num_factor_zeroed",
|
||||
position = "jitter",
|
||||
color_var = "conc_num_factor_factor",
|
||||
position = "jitter", # Apply jitter for better visibility
|
||||
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
||||
annotations = list(
|
||||
list(
|
||||
x = mean(df_na_l_outside_2sd_k_stats$L, na.rm = TRUE),
|
||||
y = mean(df_na_l_outside_2sd_k_stats$K, na.rm = TRUE),
|
||||
label = paste("Total strains:", nrow(df_na_l_outside_2sd_k_stats))
|
||||
)
|
||||
),
|
||||
error_bar = FALSE, # No error bars for this plot
|
||||
legend_position = "right"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
delta_bg_outside_2sd_k_plot_configs <- list(
|
||||
grid_layout = list(ncol = 1, nrow = 1), # Ensures it's compatible with generate_and_save_plots
|
||||
plots = list(
|
||||
list(
|
||||
df = df_na_l_outside_2sd_k_stats,
|
||||
x_var = "delta_bg",
|
||||
y_var = "K",
|
||||
plot_type = "scatter",
|
||||
gene_point = TRUE,
|
||||
title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
|
||||
color_var = "conc_num_factor_zeroed",
|
||||
position = "jitter",
|
||||
color_var = "conc_num_factor_factor",
|
||||
position = "jitter", # Apply jitter for better visibility
|
||||
tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
|
||||
annotations = list(
|
||||
list(
|
||||
x = mean(df_na_l_outside_2sd_k_stats$delta_bg, na.rm = TRUE),
|
||||
y = mean(df_na_l_outside_2sd_k_stats$K, na.rm = TRUE),
|
||||
label = paste("Total strains:", nrow(df_na_l_outside_2sd_k_stats))
|
||||
)
|
||||
),
|
||||
error_bar = FALSE, # No error bars for this plot
|
||||
legend_position = "right"
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
message("Generating quality control plots in parallel")
|
||||
# future::plan(future::multicore, workers = parallel::detectCores())
|
||||
future::plan(future::multisession, workers = 3) # generate 3 plots in parallel
|
||||
|
||||
plot_configs <- list(
|
||||
list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control",
|
||||
plot_configs = l_vs_k_plot_configs),
|
||||
list(out_dir = out_dir_qc, filename = "frequency_delta_background",
|
||||
plot_configs = frequency_delta_bg_plot_configs),
|
||||
list(out_dir = out_dir_qc, filename = "L_vs_K_above_threshold",
|
||||
plot_configs = above_threshold_plot_configs),
|
||||
list(out_dir = out_dir_qc, filename = "plate_analysis",
|
||||
plot_configs = plate_analysis_plot_configs),
|
||||
list(out_dir = out_dir_qc, filename = "plate_analysis_boxplots",
|
||||
plot_configs = plate_analysis_boxplot_configs),
|
||||
list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros",
|
||||
plot_configs = plate_analysis_no_zeros_plot_configs),
|
||||
list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros_boxplots",
|
||||
plot_configs = plate_analysis_no_zeros_boxplot_configs),
|
||||
list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
|
||||
plot_configs = l_outside_2sd_k_plot_configs),
|
||||
list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2sd_outside_mean_K",
|
||||
plot_configs = delta_bg_outside_2sd_k_plot_configs)
|
||||
)
|
||||
generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plot_configs)
|
||||
quit()
|
||||
# # future::plan(future::multicore, workers = parallel::detectCores())
|
||||
# future::plan(future::multisession, workers = 3) # generate 3 plots in parallel
|
||||
|
||||
# Generating quality control plots in parallel
|
||||
# plot_configs <- list(
|
||||
# list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control",
|
||||
# plot_configs = l_vs_k_plot_configs),
|
||||
# list(out_dir = out_dir_qc, filename = "frequency_delta_background",
|
||||
# plot_configs = frequency_delta_bg_plot_configs),
|
||||
# list(out_dir = out_dir_qc, filename = "L_vs_K_above_threshold",
|
||||
# plot_configs = above_threshold_plot_configs),
|
||||
# list(out_dir = out_dir_qc, filename = "plate_analysis",
|
||||
# plot_configs = plate_analysis_plot_configs),
|
||||
# list(out_dir = out_dir_qc, filename = "plate_analysis_boxplots",
|
||||
# plot_configs = plate_analysis_boxplot_configs),
|
||||
# list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros",
|
||||
# plot_configs = plate_analysis_no_zeros_plot_configs),
|
||||
# list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros_boxplots",
|
||||
# plot_configs = plate_analysis_no_zeros_boxplot_configs),
|
||||
# list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
|
||||
# plot_configs = l_outside_2sd_k_plot_configs),
|
||||
# list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2sd_outside_mean_K",
|
||||
# plot_configs = delta_bg_outside_2sd_k_plot_configs)
|
||||
# )
|
||||
|
||||
# # Generating quality control plots in parallel
|
||||
# furrr::future_map(plot_configs, function(config) {
|
||||
# generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
|
||||
# }, .options = furrr_options(seed = TRUE))
|
||||
@@ -1340,7 +1232,7 @@ main <- function() {
|
||||
|
||||
# Recalculate summary statistics for the background strain
|
||||
message("Calculating summary statistics for background strain")
|
||||
ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
||||
summary_stats_bg <- ss_bg$summary_stats
|
||||
write.csv(summary_stats_bg,
|
||||
file = file.path(out_dir, paste0("summary_stats_background_strain_", strain, ".csv")),
|
||||
@@ -1351,7 +1243,7 @@ main <- function() {
|
||||
df_reference <- df_na_stats %>% # formerly X2_RF
|
||||
filter(OrfRep == strain) %>%
|
||||
filter(!is.na(L)) %>%
|
||||
group_by(conc_num, conc_num_factor) %>%
|
||||
group_by(conc_num, conc_num_factor, conc_num_factor_factor) %>%
|
||||
mutate(
|
||||
max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||
@@ -1364,7 +1256,7 @@ main <- function() {
|
||||
filter(OrfRep != strain) %>%
|
||||
filter(!is.na(L)) %>%
|
||||
mutate(SM = 0) %>%
|
||||
group_by(conc_num, conc_num_factor) %>%
|
||||
group_by(conc_num, conc_num_factor, conc_num_factor_factor) %>%
|
||||
mutate(
|
||||
max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||
@@ -1376,7 +1268,7 @@ main <- function() {
|
||||
df_reference_stats <- calculate_summary_stats(
|
||||
df = df_reference,
|
||||
variables = interaction_vars,
|
||||
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
|
||||
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor")
|
||||
)$df_with_stats
|
||||
reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene", "num"))
|
||||
zscore_calculations_reference <- reference_results$calculations
|
||||
@@ -1387,9 +1279,9 @@ main <- function() {
|
||||
df_deletion_stats <- calculate_summary_stats(
|
||||
df = df_deletion,
|
||||
variables = interaction_vars,
|
||||
group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")
|
||||
group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor", "conc_num_factor_factor")
|
||||
)$df_with_stats
|
||||
deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep"))
|
||||
deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene"))
|
||||
zscore_calculations <- deletion_results$calculations
|
||||
zscore_interactions <- deletion_results$interactions
|
||||
zscore_interactions_joined <- deletion_results$full_data
|
||||
|
||||
Reference in New Issue
Block a user