Flatten means and sds for error bars
This commit is contained in:
@@ -345,7 +345,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
|
||||
ungroup()
|
||||
|
||||
# For interaction plot error bars
|
||||
delta_means <- calculations %>%
|
||||
delta_means_sds <- calculations %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
summarise(
|
||||
mean_Delta_L = mean(Delta_L, na.rm = TRUE),
|
||||
@@ -356,12 +356,11 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
|
||||
sd_Delta_K = sd(Delta_K, na.rm = TRUE),
|
||||
sd_Delta_r = sd(Delta_r, na.rm = TRUE),
|
||||
sd_Delta_AUC = sd(Delta_AUC, na.rm = TRUE),
|
||||
|
||||
.groups = "drop"
|
||||
)
|
||||
|
||||
calculations <- calculations %>%
|
||||
left_join(delta_means, by = group_vars)
|
||||
left_join(delta_means_sds, by = group_vars)
|
||||
|
||||
# Summary statistics for lm scores
|
||||
lm_means_sds <- calculations %>%
|
||||
@@ -436,13 +435,12 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
|
||||
NG = first(NG),
|
||||
DB = first(DB),
|
||||
SM = first(SM),
|
||||
|
||||
.groups = "drop"
|
||||
)
|
||||
|
||||
# Add overlap threshold categories based on Z-lm and Avg-Z scores
|
||||
interactions <- interactions %>%
|
||||
filter(!is.na(Z_lm_L)) %>%
|
||||
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>%
|
||||
mutate(
|
||||
Overlap = case_when(
|
||||
Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
|
||||
@@ -603,7 +601,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
|
||||
|
||||
plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position)
|
||||
|
||||
# Add appropriate plot layer based on plot type
|
||||
# Add appropriate plot layer or helper function based on plot type
|
||||
plot <- switch(config$plot_type,
|
||||
"scatter" = generate_scatter_plot(plot, config),
|
||||
"box" = generate_boxplot(plot, config),
|
||||
@@ -642,8 +640,19 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
|
||||
|
||||
# Add error bars if specified
|
||||
if (!is.null(config$error_bar) && config$error_bar) {
|
||||
y_mean_col <- paste0("mean_", config$y_var)
|
||||
y_sd_col <- paste0("sd_", config$y_var)
|
||||
# Check if custom columns are provided for y_mean and y_sd, or use the defaults
|
||||
y_mean_col <- if (!is.null(config$error_bar_params$y_mean_col)) {
|
||||
config$error_bar_params$y_mean_col
|
||||
} else {
|
||||
paste0("mean_", config$y_var)
|
||||
}
|
||||
|
||||
y_sd_col <- if (!is.null(config$error_bar_params$y_sd_col)) {
|
||||
config$error_bar_params$y_sd_col
|
||||
} else {
|
||||
paste0("sd_", config$y_var)
|
||||
}
|
||||
|
||||
# If color_var is provided and no fixed error bar color is set, use aes() to map color dynamically
|
||||
if (!is.null(config$color_var) && is.null(config$error_bar_params$color)) {
|
||||
plot <- plot + geom_errorbar(
|
||||
@@ -651,7 +660,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
|
||||
x = .data[[config$x_var]],
|
||||
ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
|
||||
ymax = .data[[y_mean_col]] + .data[[y_sd_col]],
|
||||
color = .data[[config$color_var]] # Dynamic color from the data
|
||||
color = .data[[config$color_var]]
|
||||
)
|
||||
)
|
||||
} else {
|
||||
@@ -662,7 +671,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
|
||||
ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
|
||||
ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
|
||||
),
|
||||
color = config$error_bar_params$color # Fixed color
|
||||
color = config$error_bar_params$color
|
||||
)
|
||||
}
|
||||
|
||||
# Add the center point if the option is provided
|
||||
if (!is.null(config$error_bar_params$center_point)) {
|
||||
plot <- plot + geom_point(aes(
|
||||
x = .data[[config$x_var]],
|
||||
y = .data[[y_mean_col]]),
|
||||
color = config$error_bar_params$color,
|
||||
shape = 16
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -731,6 +750,10 @@ generate_scatter_plot <- function(plot, config) {
|
||||
)
|
||||
}
|
||||
|
||||
if (!is.null(config$gray_points) && config$gray_points) {
|
||||
plot <- plot + geom_point(shape = 3, color = "gray70", size = 1)
|
||||
}
|
||||
|
||||
# Add Smooth Line if specified
|
||||
if (!is.null(config$smooth) && config$smooth) {
|
||||
smooth_color <- if (!is.null(config$smooth_color)) config$smooth_color else "blue"
|
||||
@@ -742,14 +765,17 @@ generate_scatter_plot <- function(plot, config) {
|
||||
slope = config$lm_line$slope,
|
||||
color = smooth_color
|
||||
)
|
||||
} else {
|
||||
plot <- plot +
|
||||
geom_smooth(
|
||||
method = "lm",
|
||||
se = FALSE,
|
||||
color = smooth_color
|
||||
)
|
||||
}
|
||||
|
||||
# For now I want to try and hardcode it
|
||||
# else {
|
||||
# plot <- plot +
|
||||
# geom_smooth(
|
||||
# method = "lm",
|
||||
# se = FALSE,
|
||||
# color = smooth_color
|
||||
# )
|
||||
# }
|
||||
}
|
||||
|
||||
# Add SD Bands if specified
|
||||
@@ -878,7 +904,7 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df
|
||||
return(list(plots = plot_configs))
|
||||
}
|
||||
|
||||
generate_interaction_plot_configs <- function(df, type) {
|
||||
generate_interaction_plot_configs <- function(df_summary, df_interaction, type) {
|
||||
|
||||
# Define the y-limits for the plots
|
||||
limits_map <- list(
|
||||
@@ -893,7 +919,7 @@ generate_interaction_plot_configs <- function(df, type) {
|
||||
delta_plot_configs <- list()
|
||||
|
||||
# Overall statistics plots
|
||||
OrfRep <- first(df$OrfRep) # this should correspond to the reference strain
|
||||
OrfRep <- first(df_summary$OrfRep) # this should correspond to the reference strain
|
||||
|
||||
for (plot_type in c("scatter", "box")) {
|
||||
|
||||
@@ -903,15 +929,15 @@ generate_interaction_plot_configs <- function(df, type) {
|
||||
|
||||
# Common plot configuration
|
||||
plot_config <- list(
|
||||
df = df,
|
||||
df = df_summary,
|
||||
plot_type = plot_type,
|
||||
x_var = "conc_num_factor_factor",
|
||||
y_var = var,
|
||||
shape = 16,
|
||||
x_label = unique(df$Drug)[1],
|
||||
x_label = paste0("[", unique(df_summary$Drug)[1], "]"),
|
||||
coord_cartesian = y_limits,
|
||||
x_breaks = unique(df$conc_num_factor_factor),
|
||||
x_labels = as.character(unique(df$conc_num))
|
||||
x_breaks = unique(df_summary$conc_num_factor_factor),
|
||||
x_labels = as.character(unique(df_summary$conc_num))
|
||||
)
|
||||
|
||||
# Add specific configurations for scatter and box plots
|
||||
@@ -920,23 +946,25 @@ generate_interaction_plot_configs <- function(df, type) {
|
||||
plot_config$error_bar <- TRUE
|
||||
plot_config$error_bar_params <- list(
|
||||
color = "red",
|
||||
center_point = TRUE
|
||||
center_point = TRUE,
|
||||
y_mean_col = paste0("mean_mean_", var),
|
||||
y_sd_col = paste0("mean_sd_", var)
|
||||
)
|
||||
plot_config$position <- "jitter"
|
||||
|
||||
# Cannot figure out how to place these properly for discrete x-axis so let's be hacky
|
||||
annotations <- list(
|
||||
list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = " NG:"),
|
||||
list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = " DB:"),
|
||||
list(x = 0.25, y = y_limits[1], label = " SM:")
|
||||
list(x = 0.25, y = y_limits[1] + 0.08 * y_span, label = " NG =", size = 4),
|
||||
list(x = 0.25, y = y_limits[1] + 0.04 * y_span, label = " DB =", size = 4),
|
||||
list(x = 0.25, y = y_limits[1], label = " SM =", size = 4)
|
||||
)
|
||||
|
||||
for (x_val in unique(df$conc_num_factor_factor)) {
|
||||
current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
|
||||
for (x_val in unique(df_summary$conc_num_factor_factor)) {
|
||||
current_df <- df_summary %>% filter(.data[[plot_config$x_var]] == x_val)
|
||||
annotations <- append(annotations, list(
|
||||
list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = first(current_df$NG, default = 0)),
|
||||
list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = first(current_df$DB, default = 0)),
|
||||
list(x = x_val, y = y_limits[1], label = first(current_df$SM, default = 0))
|
||||
list(x = x_val, y = y_limits[1] + 0.08 * y_span, label = first(current_df$NG, default = 0), size = 4),
|
||||
list(x = x_val, y = y_limits[1] + 0.04 * y_span, label = first(current_df$DB, default = 0), size = 4),
|
||||
list(x = x_val, y = y_limits[1], label = first(current_df$SM, default = 0), size = 4)
|
||||
))
|
||||
}
|
||||
|
||||
@@ -945,7 +973,7 @@ generate_interaction_plot_configs <- function(df, type) {
|
||||
stats_plot_configs <- append(stats_plot_configs, list(plot_config))
|
||||
|
||||
} else if (plot_type == "box") {
|
||||
plot_config$title <- sprintf("%s Boxplot RF for %s with SD", OrfRep, var)
|
||||
plot_config$title <- sprintf("%s Box RF for %s with SD", OrfRep, var)
|
||||
plot_config$position <- "dodge"
|
||||
|
||||
stats_boxplot_configs <- append(stats_boxplot_configs, list(plot_config))
|
||||
@@ -967,7 +995,7 @@ generate_interaction_plot_configs <- function(df, type) {
|
||||
AUC = c(-6000, 6000)
|
||||
)
|
||||
|
||||
grouped_data <- df %>%
|
||||
grouped_data <- df_interaction %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
group_split()
|
||||
|
||||
@@ -1005,9 +1033,10 @@ generate_interaction_plot_configs <- function(df, type) {
|
||||
plot_type = "scatter",
|
||||
x_var = "conc_num_factor_factor",
|
||||
y_var = paste0("Delta_", var),
|
||||
x_label = unique(group_data$Drug)[1],
|
||||
x_label = paste0("[", unique(df_summary$Drug)[1], "]"),
|
||||
shape = 16,
|
||||
title = paste(OrfRepTitle, Gene, sep = " "),
|
||||
title_size = rel(1.2),
|
||||
title_size = rel(1.3),
|
||||
coord_cartesian = y_limits,
|
||||
annotations = list(
|
||||
list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)),
|
||||
@@ -1021,16 +1050,17 @@ generate_interaction_plot_configs <- function(df, type) {
|
||||
error_bar_params = list(
|
||||
ymin = 0 - (2 * WT_sd_value),
|
||||
ymax = 0 + (2 * WT_sd_value),
|
||||
color = "black"
|
||||
color = "gray"
|
||||
),
|
||||
smooth = TRUE,
|
||||
x_breaks = unique(group_data$conc_num_factor_factor),
|
||||
x_labels = as.character(unique(group_data$conc_num)),
|
||||
ylim_vals = y_limits,
|
||||
y_filter = FALSE,
|
||||
smooth = TRUE,
|
||||
lm_line = list(
|
||||
intercept = lm_intercept_value,
|
||||
slope = lm_slope_value
|
||||
slope = lm_slope_value,
|
||||
color = "blue"
|
||||
)
|
||||
)
|
||||
delta_plot_configs <- append(delta_plot_configs, list(plot_config))
|
||||
@@ -1082,7 +1112,6 @@ generate_rank_plot_configs <- function(df, is_lm = FALSE, adjust = FALSE, overla
|
||||
fill_negative = "orange",
|
||||
alpha_positive = 0.3,
|
||||
alpha_negative = 0.3,
|
||||
annotations = NULL,
|
||||
shape = 3,
|
||||
size = 0.1,
|
||||
y_label = y_label,
|
||||
@@ -1183,7 +1212,8 @@ generate_correlation_plot_configs <- function(df) {
|
||||
shape = 3,
|
||||
size = 0.5,
|
||||
color_var = "Overlap",
|
||||
cyan_points = highlight_cyan # Include cyan points or not based on the loop
|
||||
cyan_points = highlight_cyan, # include cyan points or not based on the loop
|
||||
gray_points = TRUE
|
||||
)
|
||||
|
||||
plot_configs <- append(plot_configs, list(plot_config))
|
||||
@@ -1226,10 +1256,10 @@ main <- function() {
|
||||
)
|
||||
|
||||
message("Calculating summary statistics before quality control")
|
||||
df_stats <- calculate_summary_stats(
|
||||
df_stats <- calculate_summary_stats( # formerly X_stats_ALL
|
||||
df = df,
|
||||
variables = c("L", "K", "r", "AUC", "delta_bg"),
|
||||
group_vars = c("conc_num"))$df_with_stats
|
||||
group_vars = c("conc_num", "conc_num_factor_factor"))$df_with_stats
|
||||
|
||||
frequency_delta_bg_plot_configs <- list(
|
||||
plots = list(
|
||||
@@ -1295,7 +1325,7 @@ main <- function() {
|
||||
ss <- calculate_summary_stats(
|
||||
df = df_na,
|
||||
variables = c("L", "K", "r", "AUC", "delta_bg"),
|
||||
group_vars = c("conc_num"))
|
||||
group_vars = c("conc_num", "conc_num_factor_factor"))
|
||||
df_na_ss <- ss$summary_stats
|
||||
df_na_stats <- ss$df_with_stats # formerly X_stats_ALL
|
||||
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||
@@ -1307,7 +1337,7 @@ main <- function() {
|
||||
df_no_zeros_stats <- calculate_summary_stats(
|
||||
df = df_no_zeros,
|
||||
variables = c("L", "K", "r", "AUC", "delta_bg"),
|
||||
group_vars = c("conc_num")
|
||||
group_vars = c("conc_num", "conc_num_factor_factor")
|
||||
)$df_with_stats
|
||||
|
||||
message("Filtering by 2SD of K")
|
||||
@@ -1318,14 +1348,15 @@ main <- function() {
|
||||
|
||||
message("Calculating summary statistics for L within 2SD of K")
|
||||
# TODO We're omitting the original z_max calculation, not sure if needed?
|
||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L",
|
||||
group_vars = c("conc_num"))$summary_stats
|
||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", # formerly X_stats_BY_L_within_2SD_K
|
||||
group_vars = c("conc_num", "conc_num_factor_factor"))$summary_stats
|
||||
write.csv(ss,
|
||||
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2SD_K.csv"),
|
||||
row.names = FALSE)
|
||||
|
||||
message("Calculating summary statistics for L outside 2SD of K")
|
||||
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num"))
|
||||
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", # formerly X_stats_BY_L_outside_2SD_K
|
||||
group_vars = c("conc_num", "conc_num_factor_factor"))
|
||||
df_na_l_outside_2sd_k_stats <- ss$df_with_stats
|
||||
write.csv(ss$summary_stats,
|
||||
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2SD_K.csv"),
|
||||
@@ -1435,10 +1466,10 @@ main <- function() {
|
||||
)
|
||||
|
||||
# Parallelize background and quality control plot generation
|
||||
furrr::future_map(plot_configs, function(config) {
|
||||
generate_and_save_plots(config$out_dir, config$filename, config$plot_configs,
|
||||
page_width = config$page_width, page_height = config$page_height)
|
||||
}, .options = furrr_options(seed = TRUE))
|
||||
# furrr::future_map(plot_configs, function(config) {
|
||||
# generate_and_save_plots(config$out_dir, config$filename, config$plot_configs,
|
||||
# page_width = config$page_width, page_height = config$page_height)
|
||||
# }, .options = furrr_options(seed = TRUE))
|
||||
|
||||
# Loop over background strains
|
||||
# TODO currently only tested against one strain, if we want to do multiple strains we'll
|
||||
@@ -1460,7 +1491,7 @@ main <- function() {
|
||||
filter(!is.na(L))
|
||||
|
||||
message("Calculating background strain summary statistics")
|
||||
ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"),
|
||||
ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY
|
||||
group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
|
||||
summary_stats_bg <- ss_bg$summary_stats
|
||||
df_bg_stats <- ss_bg$df_with_stats
|
||||
@@ -1473,7 +1504,7 @@ main <- function() {
|
||||
df_reference <- df_na_stats %>% # formerly X2_RF
|
||||
filter(OrfRep == strain) %>%
|
||||
filter(!is.na(L)) %>%
|
||||
group_by(OrfRep, Drug, conc_num) %>%
|
||||
group_by(OrfRep, Drug, conc_num, conc_num_factor_factor) %>%
|
||||
mutate(
|
||||
max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||
@@ -1482,22 +1513,45 @@ main <- function() {
|
||||
ungroup()
|
||||
|
||||
message("Calculating reference strain summary statistics")
|
||||
df_reference_stats <- calculate_summary_stats(
|
||||
df_reference_summary_stats <- calculate_summary_stats( # formerly X_stats_X2_RF
|
||||
df = df_reference,
|
||||
variables = c("L", "K", "r", "AUC"),
|
||||
group_vars = c("OrfRep", "Gene", "Drug", "num", "conc_num", "conc_num_factor_factor")
|
||||
group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor")
|
||||
)$df_with_stats
|
||||
|
||||
# Summarise statistics for error bars
|
||||
df_reference_summary_stats <- df_reference_summary_stats %>%
|
||||
group_by(OrfRep, Drug, conc_num, conc_num_factor_factor) %>%
|
||||
mutate(
|
||||
mean_mean_L = first(mean_L),
|
||||
mean_sd_L = first(sd_L),
|
||||
mean_mean_K = first(mean_K),
|
||||
mean_sd_K = first(sd_K),
|
||||
mean_mean_r = first(mean_r),
|
||||
mean_sd_r = first(sd_r),
|
||||
mean_mean_AUC = first(mean_AUC),
|
||||
mean_sd_AUC = first(sd_AUC),
|
||||
.groups = "drop"
|
||||
)
|
||||
|
||||
message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction
|
||||
df_reference_interaction_stats <- calculate_summary_stats(
|
||||
df = df_reference,
|
||||
variables = c("L", "K", "r", "AUC"),
|
||||
group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
|
||||
)$df_with_stats
|
||||
|
||||
message("Calculating reference strain interaction scores")
|
||||
results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug", "num"))
|
||||
df_calculations_reference <- results$calculations
|
||||
df_interactions_reference <- results$interactions
|
||||
df_interactions_reference_joined <- results$full_data
|
||||
write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
||||
write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
||||
results <- calculate_interaction_scores(df_reference_interaction_stats,
|
||||
df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
|
||||
df_reference_calculations <- results$calculations
|
||||
df_reference_interactions <- results$interactions
|
||||
df_reference_interactions_joined <- results$full_data
|
||||
write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
||||
write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
||||
|
||||
message("Generating reference interaction plots")
|
||||
reference_plot_configs <- generate_interaction_plot_configs(df_interactions_reference_joined, "reference")
|
||||
reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
|
||||
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
|
||||
|
||||
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
|
||||
@@ -1512,7 +1566,7 @@ main <- function() {
|
||||
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
|
||||
ungroup()
|
||||
|
||||
message("Calculating deletion strain(s) summary statistics")
|
||||
message("Calculating deletion strain(s) interaction summary statistics")
|
||||
df_deletion_stats <- calculate_summary_stats(
|
||||
df = df_deletion,
|
||||
variables = c("L", "K", "r", "AUC"),
|
||||
@@ -1528,7 +1582,7 @@ main <- function() {
|
||||
write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
|
||||
|
||||
message("Generating deletion interaction plots")
|
||||
deletion_plot_configs <- generate_interaction_plot_configs(df_interactions_joined, "deletion")
|
||||
deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion")
|
||||
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)
|
||||
|
||||
message("Writing enhancer/suppressor csv files")
|
||||
|
||||
Reference in New Issue
Block a user