Correct grouping for deletion WT summarization

This commit is contained in:
2024-10-05 20:44:25 -04:00
parent 08bf4946e0
commit a309130c39

View File

@@ -204,14 +204,22 @@ calculate_summary_stats <- function(df, variables, group_vars) {
return(list(summary_stats = summary_stats, df_with_stats = df_joined))
}
calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshold = 2) {
calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) {
max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
total_conc_num <- length(unique(df$conc_num))
if (type == "reference") {
bg_group_vars <- c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor", "conc_num_factor_factor")
group_vars <- c("OrfRep", "Gene", "num", "Drug")
} else if (type == "deletion") {
bg_group_vars <- c("Drug", "conc_num", "conc_num_factor", "conc_num_factor_factor")
group_vars <- c("OrfRep", "Gene", "Drug")
}
# Calculate WT statistics from df_bg
wt_stats <- df_bg %>%
group_by(OrfRep, Gene, num, Drug, conc_num, conc_num_factor, conc_num_factor_factor) %>%
group_by(across(all_of(bg_group_vars))) %>%
summarise(
WT_L = mean(mean_L, na.rm = TRUE),
WT_sd_L = mean(sd_L, na.rm = TRUE),
@@ -224,6 +232,10 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
.groups = "drop"
)
# Join WT statistics to df
df <- df %>%
left_join(wt_stats, by = bg_group_vars)
# Compute mean values at zero concentration
mean_zeroes <- df %>%
filter(conc_num == 0) %>%
@@ -236,9 +248,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
.groups = "drop"
)
# Join WT statistics to df
df <- df %>%
left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")) %>%
left_join(mean_zeroes, by = c(group_vars))
# Calculate Raw Shifts and Z Shifts
@@ -702,10 +712,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
total_spots <- grid_layout$nrow * grid_layout$ncol
num_plots <- length(static_plots)
if (num_plots < total_spots) {
message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
}
# if (num_plots < total_spots) {
# message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
# static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
# }
grid.arrange(
grobs = static_plots,
ncol = grid_layout$ncol,
@@ -775,7 +786,6 @@ generate_scatter_plot <- function(plot, config) {
linewidth = ifelse(!is.null(config$lm_line$linewidth), config$lm_line$linewidth, 1)
)
}
# Add SD Bands if specified
if (!is.null(config$sd_band)) {
@@ -1538,25 +1548,22 @@ main <- function() {
.groups = "drop"
)
message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction
df_reference_interaction_stats <- calculate_summary_stats(
df = df_reference,
variables = c("L", "K", "r", "AUC"),
group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
)$df_with_stats
# message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction
# df_reference_interaction_stats <- calculate_summary_stats(
# df = df_reference,
# variables = c("L", "K", "r", "AUC"),
# group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
# )$df_with_stats
message("Calculating reference strain interaction scores")
reference_results <- calculate_interaction_scores(df_reference_interaction_stats,
df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
df_reference_calculations <- reference_results$calculations
df_reference_interactions <- reference_results$interactions
df_reference_interactions_joined <- reference_results$full_data
write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
# # message("Calculating reference strain interaction scores")
# reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
# df_reference_interactions_joined <- reference_results$full_data
# write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
# write.csv(reference_results$interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
message("Generating reference interaction plots")
reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
# # message("Generating reference interaction plots")
# reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
# generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
df_deletion <- df_na_stats %>% # formerly X2
@@ -1578,13 +1585,14 @@ main <- function() {
)$df_with_stats
message("Calculating deletion strain(s) interactions scores")
deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
df_calculations <- deletion_results$calculations
deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, "deletion")
df_interactions <- deletion_results$interactions
df_interactions_joined <- deletion_results$full_data
write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
write.csv(deletion_results$calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
print(df_interactions_joined, n = 20, width = 100000)
message("Generating deletion interaction plots")
deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion")
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)