Regroup calculations

This commit is contained in:
2024-10-05 14:24:53 -04:00
parent f207e40efd
commit ddcc26050f

View File

@@ -210,7 +210,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
# Calculate WT statistics from df_bg # Calculate WT statistics from df_bg
wt_stats <- df_bg %>% wt_stats <- df_bg %>%
group_by(across(all_of(group_vars)), conc_num, conc_num_factor_factor) %>% group_by(OrfRep, Gene, num, Drug, conc_num, conc_num_factor, conc_num_factor_factor) %>%
summarise( summarise(
WT_L = mean(mean_L, na.rm = TRUE), WT_L = mean(mean_L, na.rm = TRUE),
WT_sd_L = mean(sd_L, na.rm = TRUE), WT_sd_L = mean(sd_L, na.rm = TRUE),
@@ -223,12 +223,8 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
.groups = "drop" .groups = "drop"
) )
# Join WT stats back to df
df <- df %>%
left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor_factor"))
# Compute mean values at zero concentration # Compute mean values at zero concentration
mean_L_zero_df <- df %>% mean_zeroes <- df %>%
filter(conc_num == 0) %>% filter(conc_num == 0) %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
summarise( summarise(
@@ -239,10 +235,11 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
.groups = "drop" .groups = "drop"
) )
# Join mean_L_zero_df to df # Join WT statistics to df
df <- df %>% df <- df %>%
left_join(mean_L_zero_df, by = group_vars) left_join(wt_stats, by = c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")) %>%
left_join(mean_zeroes, by = c(group_vars))
# Calculate Raw Shifts and Z Shifts # Calculate Raw Shifts and Z Shifts
df <- df %>% df <- df %>%
mutate( mutate(
@@ -257,7 +254,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
) )
calculations <- df %>% calculations <- df %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")))) %>%
mutate( mutate(
NG_sum = sum(NG, na.rm = TRUE), NG_sum = sum(NG, na.rm = TRUE),
DB_sum = sum(DB, na.rm = TRUE), DB_sum = sum(DB, na.rm = TRUE),
@@ -289,6 +286,8 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
Zscore_r = Delta_r / WT_sd_r, Zscore_r = Delta_r / WT_sd_r,
Zscore_AUC = Delta_AUC / WT_sd_AUC Zscore_AUC = Delta_AUC / WT_sd_AUC
) %>% ) %>%
ungroup() %>% # Ungroup before group_modify
group_by(across(all_of(group_vars))) %>%
group_modify(~ { group_modify(~ {
# Perform linear models only if there are enough unique conc_num_factor levels # Perform linear models only if there are enough unique conc_num_factor levels
if (length(unique(.x$conc_num_factor)) > 1) { if (length(unique(.x$conc_num_factor)) > 1) {
@@ -354,8 +353,8 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
left_join(delta_means_sds, by = group_vars) left_join(delta_means_sds, by = group_vars)
# Summary statistics for lm scores # Summary statistics for lm scores
lm_means_sds <- calculations %>% calculations <- calculations %>%
summarise( mutate(
lm_mean_L = mean(lm_Score_L, na.rm = TRUE), lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
lm_sd_L = sd(lm_Score_L, na.rm = TRUE), lm_sd_L = sd(lm_Score_L, na.rm = TRUE),
lm_mean_K = mean(lm_Score_K, na.rm = TRUE), lm_mean_K = mean(lm_Score_K, na.rm = TRUE),
@@ -363,25 +362,9 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol
lm_mean_r = mean(lm_Score_r, na.rm = TRUE), lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
lm_sd_r = sd(lm_Score_r, na.rm = TRUE), lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE), lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE), lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE)
.groups = "drop" ) %>%
) # Calculate Z-lm scores
# Add lm score means and standard deviations to calculations
calculations <- calculations %>%
mutate(
lm_mean_L = lm_means_sds$lm_mean_L,
lm_sd_L = lm_means_sds$lm_sd_L,
lm_mean_K = lm_means_sds$lm_mean_K,
lm_sd_K = lm_means_sds$lm_sd_K,
lm_mean_r = lm_means_sds$lm_mean_r,
lm_sd_r = lm_means_sds$lm_sd_r,
lm_mean_AUC = lm_means_sds$lm_mean_AUC,
lm_sd_AUC = lm_means_sds$lm_sd_AUC
)
# Calculate Z-lm scores
calculations <- calculations %>%
mutate( mutate(
Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L, Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K, Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
@@ -534,7 +517,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
if (!is.null(grid_layout$ncol) && is.null(grid_layout$nrow)) { if (!is.null(grid_layout$ncol) && is.null(grid_layout$nrow)) {
num_plots <- length(plots) num_plots <- length(plots)
nrow <- ceiling(num_plots / grid_layout$ncol) nrow <- ceiling(num_plots / grid_layout$ncol)
message("No nrow provided, automatically using nrow = ", nrow) # message("No nrow provided, automatically using nrow = ", nrow)
grid_layout$nrow <- nrow grid_layout$nrow <- nrow
} }
@@ -563,11 +546,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
# Print rows being filtered out # Print rows being filtered out
if (nrow(out_of_bounds_df) > 0) { if (nrow(out_of_bounds_df) > 0) {
message("Filtered: ", config$title, "using y-limits: [", config$ylim_vals[1], ", ", config$ylim_vals[2], "]")
message("# of filtered rows outside y-limits (for plotting): ", nrow(out_of_bounds_df)) message("# of filtered rows outside y-limits (for plotting): ", nrow(out_of_bounds_df))
# print(out_of_bounds_df) # print(out_of_bounds_df)
} }
# Filter the valid data for plotting
df <- df %>% df <- df %>%
filter( filter(
!is.na(.data[[config$y_var]]) & !is.na(.data[[config$y_var]]) &
@@ -599,6 +582,15 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
} }
} }
# Create a null plot with a "No data" message if no rows remain
# if (nrow(df) == 0) {
# plot <- ggplot() +
# geom_text(aes(0.5, 0.5), label = "No data available", size = 5) +
# theme_void() + ggtitle(config$title)
# } else {
# plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position)
# }
plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position) plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position)
# Add appropriate plot layer or helper function based on plot type # Add appropriate plot layer or helper function based on plot type
@@ -665,7 +657,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
ymax = !!custom_ymax_expr ymax = !!custom_ymax_expr
), ),
color = config$error_bar_params$color, color = config$error_bar_params$color,
linewidth = 0.1 linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
) )
} else { } else {
@@ -688,7 +680,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
ymax = .data[[y_mean_col]] + .data[[y_sd_col]] ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
), ),
color = config$error_bar_params$color, color = config$error_bar_params$color,
linewidth = 0.1 linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
) )
} }
} }
@@ -793,7 +785,8 @@ generate_scatter_plot <- function(plot, config) {
geom_abline( geom_abline(
intercept = config$lm_line$intercept, intercept = config$lm_line$intercept,
slope = config$lm_line$slope, slope = config$lm_line$slope,
color = smooth_color color = smooth_color,
linewidth = 1
) )
} }
} }
@@ -1072,7 +1065,8 @@ generate_interaction_plot_configs <- function(df_summary, df_interaction, type)
ymin = paste0("0 - 2 * WT_sd_", var), ymin = paste0("0 - 2 * WT_sd_", var),
ymax = paste0("0 + 2 * WT_sd_", var) ymax = paste0("0 + 2 * WT_sd_", var)
), ),
color = "gray" color = "gray70",
linewidth = 0.5
), ),
x_breaks = unique(group_data$conc_num_factor_factor), x_breaks = unique(group_data$conc_num_factor_factor),
x_labels = as.character(unique(group_data$conc_num)), x_labels = as.character(unique(group_data$conc_num)),
@@ -1564,7 +1558,8 @@ main <- function() {
)$df_with_stats )$df_with_stats
message("Calculating reference strain interaction scores") message("Calculating reference strain interaction scores")
reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug")) reference_results <- calculate_interaction_scores(df_reference_interaction_stats,
df_bg_stats, group_vars = c("OrfRep", "Gene", "num", "Drug"))
df_reference_calculations <- reference_results$calculations df_reference_calculations <- reference_results$calculations
df_reference_interactions <- reference_results$interactions df_reference_interactions <- reference_results$interactions
df_reference_interactions_joined <- reference_results$full_data df_reference_interactions_joined <- reference_results$full_data