Correct grouping for deletion WT summarization

This commit is contained in:
2024-10-05 20:44:25 -04:00
parent 08bf4946e0
commit a309130c39

View File

@@ -204,14 +204,22 @@ calculate_summary_stats <- function(df, variables, group_vars) {
return(list(summary_stats = summary_stats, df_with_stats = df_joined)) return(list(summary_stats = summary_stats, df_with_stats = df_joined))
} }
calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshold = 2) { calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) {
max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE) max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
total_conc_num <- length(unique(df$conc_num)) total_conc_num <- length(unique(df$conc_num))
if (type == "reference") {
bg_group_vars <- c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor", "conc_num_factor_factor")
group_vars <- c("OrfRep", "Gene", "num", "Drug")
} else if (type == "deletion") {
bg_group_vars <- c("Drug", "conc_num", "conc_num_factor", "conc_num_factor_factor")
group_vars <- c("OrfRep", "Gene", "Drug")
}
# Calculate WT statistics from df_bg # Calculate WT statistics from df_bg
wt_stats <- df_bg %>% wt_stats <- df_bg %>%
group_by(OrfRep, Gene, num, Drug, conc_num, conc_num_factor, conc_num_factor_factor) %>% group_by(across(all_of(bg_group_vars))) %>%
summarise( summarise(
WT_L = mean(mean_L, na.rm = TRUE), WT_L = mean(mean_L, na.rm = TRUE),
WT_sd_L = mean(sd_L, na.rm = TRUE), WT_sd_L = mean(sd_L, na.rm = TRUE),
@@ -224,6 +232,10 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
.groups = "drop" .groups = "drop"
) )
# Join WT statistics to df
df <- df %>%
left_join(wt_stats, by = bg_group_vars)
# Compute mean values at zero concentration # Compute mean values at zero concentration
mean_zeroes <- df %>% mean_zeroes <- df %>%
filter(conc_num == 0) %>% filter(conc_num == 0) %>%
@@ -236,9 +248,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
.groups = "drop" .groups = "drop"
) )
# Join WT statistics to df
df <- df %>% df <- df %>%
left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")) %>%
left_join(mean_zeroes, by = c(group_vars)) left_join(mean_zeroes, by = c(group_vars))
# Calculate Raw Shifts and Z Shifts # Calculate Raw Shifts and Z Shifts
@@ -702,10 +712,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
total_spots <- grid_layout$nrow * grid_layout$ncol total_spots <- grid_layout$nrow * grid_layout$ncol
num_plots <- length(static_plots) num_plots <- length(static_plots)
if (num_plots < total_spots) { # if (num_plots < total_spots) {
message("Filling ", total_spots - num_plots, " empty spots with nullGrob()") # message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE)) # static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
} # }
grid.arrange( grid.arrange(
grobs = static_plots, grobs = static_plots,
ncol = grid_layout$ncol, ncol = grid_layout$ncol,
@@ -776,7 +787,6 @@ generate_scatter_plot <- function(plot, config) {
) )
} }
# Add SD Bands if specified # Add SD Bands if specified
if (!is.null(config$sd_band)) { if (!is.null(config$sd_band)) {
plot <- plot + plot <- plot +
@@ -1538,25 +1548,22 @@ main <- function() {
.groups = "drop" .groups = "drop"
) )
message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction # message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction
df_reference_interaction_stats <- calculate_summary_stats( # df_reference_interaction_stats <- calculate_summary_stats(
df = df_reference, # df = df_reference,
variables = c("L", "K", "r", "AUC"), # variables = c("L", "K", "r", "AUC"),
group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor") # group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
)$df_with_stats # )$df_with_stats
message("Calculating reference strain interaction scores") # # message("Calculating reference strain interaction scores")
reference_results <- calculate_interaction_scores(df_reference_interaction_stats, # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug")) # df_reference_interactions_joined <- reference_results$full_data
df_reference_calculations <- reference_results$calculations # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
df_reference_interactions <- reference_results$interactions # write.csv(reference_results$interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
df_reference_interactions_joined <- reference_results$full_data
write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
message("Generating reference interaction plots") # # message("Generating reference interaction plots")
reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
message("Setting missing deletion values to the highest theoretical value at each drug conc for L") message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
df_deletion <- df_na_stats %>% # formerly X2 df_deletion <- df_na_stats %>% # formerly X2
@@ -1578,13 +1585,14 @@ main <- function() {
)$df_with_stats )$df_with_stats
message("Calculating deletion strain(s) interactions scores") message("Calculating deletion strain(s) interactions scores")
deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug")) deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, "deletion")
df_calculations <- deletion_results$calculations
df_interactions <- deletion_results$interactions df_interactions <- deletion_results$interactions
df_interactions_joined <- deletion_results$full_data df_interactions_joined <- deletion_results$full_data
write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) write.csv(deletion_results$calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
print(df_interactions_joined, n = 20, width = 100000)
message("Generating deletion interaction plots") message("Generating deletion interaction plots")
deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion") deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion")
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16) generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)