Move error bars to generate_scattter_plots()

This commit is contained in:
2024-10-06 14:58:54 -04:00
parent bee9aea866
commit faa82e0af4

View File

@@ -300,52 +300,45 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
ungroup() %>% # Ungroup before group_modify ungroup() %>% # Ungroup before group_modify
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
group_modify(~ { group_modify(~ {
# Check if there are enough unique conc_num_factor levels to perform lm
if (length(unique(.x$conc_num_factor)) > 1) {
# Perform linear modeling # Filter each column for valid data or else linear modeling will fail
lm_L <- lm(Delta_L ~ conc_num_factor, data = .x) valid_data_L <- .x %>% filter(!is.na(Delta_L))
lm_K <- lm(Delta_K ~ conc_num_factor, data = .x) valid_data_K <- .x %>% filter(!is.na(Delta_K))
lm_r <- lm(Delta_r ~ conc_num_factor, data = .x) valid_data_r <- .x %>% filter(!is.na(Delta_r))
lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x) valid_data_AUC <- .x %>% filter(!is.na(Delta_AUC))
# If the model fails, set model-related values to NA # Perform linear modeling
.x %>% lm_L <- if (nrow(valid_data_L) > 1) lm(Delta_L ~ conc_num_factor, data = valid_data_L) else NULL
mutate( lm_K <- if (nrow(valid_data_K) > 1) lm(Delta_K ~ conc_num_factor, data = valid_data_K) else NULL
lm_intercept_L = ifelse(!is.null(lm_L), coef(lm_L)[1], NA), lm_r <- if (nrow(valid_data_r) > 1) lm(Delta_r ~ conc_num_factor, data = valid_data_r) else NULL
lm_slope_L = ifelse(!is.null(lm_L), coef(lm_L)[2], NA), lm_AUC <- if (nrow(valid_data_AUC) > 1) lm(Delta_AUC ~ conc_num_factor, data = valid_data_AUC) else NULL
R_Squared_L = ifelse(!is.null(lm_L), summary(lm_L)$r.squared, NA),
lm_Score_L = ifelse(!is.null(lm_L), max_conc * coef(lm_L)[2] + coef(lm_L)[1], NA),
lm_intercept_K = ifelse(!is.null(lm_K), coef(lm_K)[1], NA), # Extract coefficients for calculations and plotting
lm_slope_K = ifelse(!is.null(lm_K), coef(lm_K)[2], NA), .x %>%
R_Squared_K = ifelse(!is.null(lm_K), summary(lm_K)$r.squared, NA), mutate(
lm_Score_K = ifelse(!is.null(lm_K), max_conc * coef(lm_K)[2] + coef(lm_K)[1], NA), lm_intercept_L = if (!is.null(lm_L)) coef(lm_L)[1] else NA,
lm_slope_L = if (!is.null(lm_L)) coef(lm_L)[2] else NA,
R_Squared_L = if (!is.null(lm_L)) summary(lm_L)$r.squared else NA,
lm_Score_L = if (!is.null(lm_L)) max_conc * coef(lm_L)[2] + coef(lm_L)[1] else NA,
lm_intercept_r = ifelse(!is.null(lm_r), coef(lm_r)[1], NA), lm_intercept_K = if (!is.null(lm_K)) coef(lm_K)[1] else NA,
lm_slope_r = ifelse(!is.null(lm_r), coef(lm_r)[2], NA), lm_slope_K = if (!is.null(lm_K)) coef(lm_K)[2] else NA,
R_Squared_r = ifelse(!is.null(lm_r), summary(lm_r)$r.squared, NA), R_Squared_K = if (!is.null(lm_K)) summary(lm_K)$r.squared else NA,
lm_Score_r = ifelse(!is.null(lm_r), max_conc * coef(lm_r)[2] + coef(lm_r)[1], NA), lm_Score_K = if (!is.null(lm_K)) max_conc * coef(lm_K)[2] + coef(lm_K)[1] else NA,
lm_intercept_AUC = ifelse(!is.null(lm_AUC), coef(lm_AUC)[1], NA), lm_intercept_r = if (!is.null(lm_r)) coef(lm_r)[1] else NA,
lm_slope_AUC = ifelse(!is.null(lm_AUC), coef(lm_AUC)[2], NA), lm_slope_r = if (!is.null(lm_r)) coef(lm_r)[2] else NA,
R_Squared_AUC = ifelse(!is.null(lm_AUC), summary(lm_AUC)$r.squared, NA), R_Squared_r = if (!is.null(lm_r)) summary(lm_r)$r.squared else NA,
lm_Score_AUC = ifelse(!is.null(lm_AUC), max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1], NA) lm_Score_r = if (!is.null(lm_r)) max_conc * coef(lm_r)[2] + coef(lm_r)[1] else NA,
)
} else { lm_intercept_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[1] else NA,
# If not enough conc_num_factor levels, set lm-related values to NA lm_slope_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[2] else NA,
.x %>% R_Squared_AUC = if (!is.null(lm_AUC)) summary(lm_AUC)$r.squared else NA,
mutate( lm_Score_AUC = if (!is.null(lm_AUC)) max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1] else NA
lm_intercept_L = NA, lm_slope_L = NA, R_Squared_L = NA, lm_Score_L = NA, )
lm_intercept_K = NA, lm_slope_K = NA, R_Squared_K = NA, lm_Score_K = NA,
lm_intercept_r = NA, lm_slope_r = NA, R_Squared_r = NA, lm_Score_r = NA,
lm_intercept_AUC = NA, lm_slope_AUC = NA, R_Squared_AUC = NA, lm_Score_AUC = NA
)
}
}) %>% }) %>%
ungroup() ungroup()
# For interaction plot error bars # For interaction plot error bars
delta_means_sds <- calculations %>% delta_means_sds <- calculations %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
@@ -631,78 +624,6 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
} }
} }
# Add error bars if specified
if (!is.null(config$error_bar) && config$error_bar) {
# Check if custom columns are provided for y_mean and y_sd, or use the defaults
y_mean_col <- if (!is.null(config$error_bar_params$y_mean_col)) {
config$error_bar_params$y_mean_col
} else {
paste0("mean_", config$y_var)
}
y_sd_col <- if (!is.null(config$error_bar_params$y_sd_col)) {
config$error_bar_params$y_sd_col
} else {
paste0("sd_", config$y_var)
}
# Use rlang to handle custom error bar calculations
if (!is.null(config$error_bar_params$custom_error_bar)) {
custom_ymin_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymin)
custom_ymax_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymax)
plot <- plot + geom_errorbar(
aes(
ymin = !!custom_ymin_expr,
ymax = !!custom_ymax_expr
),
color = config$error_bar_params$color,
linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
)
} else {
# If no custom error bar formula, use the default or dynamic ones
if (!is.null(config$color_var) && config$color_var %in% colnames(config$df)) {
# Only use color_var if it's present in the dataframe
plot <- plot + geom_errorbar(
aes(
ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
ymax = .data[[y_mean_col]] + .data[[y_sd_col]],
color = .data[[config$color_var]]
),
linewidth = 0.1
)
} else {
# If color_var is missing, fall back to a default color or none
plot <- plot + geom_errorbar(
aes(
ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
),
color = config$error_bar_params$color, # use the provided color or default
linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
)
}
}
# Add the center point if the option is provided
if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) {
if (!is.null(config$error_bar_params$color)) {
plot <- plot + geom_point(
mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings
color = config$error_bar_params$color,
shape = 16,
inherit.aes = FALSE # Prevent overriding global aesthetics
)
} else {
plot <- plot + geom_point(
mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings
shape = 16,
inherit.aes = FALSE # Prevent overriding global aesthetics
)
}
}
}
# Convert ggplot to plotly for interactive version # Convert ggplot to plotly for interactive version
plotly_plot <- suppressWarnings(plotly::ggplotly(plot)) plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
@@ -729,16 +650,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
total_spots <- grid_layout$nrow * grid_layout$ncol total_spots <- grid_layout$nrow * grid_layout$ncol
num_plots <- length(static_plots) num_plots <- length(static_plots)
# if (num_plots < total_spots) { if (num_plots < total_spots) {
# message("Filling ", total_spots - num_plots, " empty spots with nullGrob()") message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
# static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE)) static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
# } }
# Print a page of gridded plots
grid.arrange( grid.arrange(
grobs = static_plots, grobs = static_plots,
ncol = grid_layout$ncol, ncol = grid_layout$ncol,
nrow = grid_layout$nrow nrow = grid_layout$nrow)
)
} else { } else {
# Print individual plots on separate pages if no grid layout # Print individual plots on separate pages if no grid layout
for (plot in static_plots) { for (plot in static_plots) {
@@ -789,6 +711,78 @@ generate_scatter_plot <- function(plot, config) {
) )
} }
# Add error bars if specified
if (!is.null(config$error_bar) && config$error_bar) {
# Check if custom columns are provided for y_mean and y_sd, or use the defaults
y_mean_col <- if (!is.null(config$error_bar_params$y_mean_col)) {
config$error_bar_params$y_mean_col
} else {
paste0("mean_", config$y_var)
}
y_sd_col <- if (!is.null(config$error_bar_params$y_sd_col)) {
config$error_bar_params$y_sd_col
} else {
paste0("sd_", config$y_var)
}
# Use rlang to handle custom error bar calculations
if (!is.null(config$error_bar_params$custom_error_bar)) {
custom_ymin_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymin)
custom_ymax_expr <- rlang::parse_expr(config$error_bar_params$custom_error_bar$ymax)
plot <- plot + geom_errorbar(
aes(
ymin = !!custom_ymin_expr,
ymax = !!custom_ymax_expr
),
color = config$error_bar_params$color,
linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
)
} else {
# If no custom error bar formula, use the default or dynamic ones
if (!is.null(config$color_var) && config$color_var %in% colnames(config$df)) {
# Only use color_var if it's present in the dataframe
plot <- plot + geom_errorbar(
aes(
ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
ymax = .data[[y_mean_col]] + .data[[y_sd_col]],
color = .data[[config$color_var]]
),
linewidth = 0.1
)
} else {
# If color_var is missing, fall back to a default color or none
plot <- plot + geom_errorbar(
aes(
ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
),
color = config$error_bar_params$color, # use the provided color or default
linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
)
}
}
# Add the center point if the option is provided
if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) {
if (!is.null(config$error_bar_params$color)) {
plot <- plot + geom_point(
mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings
color = config$error_bar_params$color,
shape = 16,
inherit.aes = FALSE # Prevent overriding global aesthetics
)
} else {
plot <- plot + geom_point(
mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]), # Include both x and y mappings
shape = 16,
inherit.aes = FALSE # Prevent overriding global aesthetics
)
}
}
}
# Add linear regression line if specified # Add linear regression line if specified
if (!is.null(config$lm_line)) { if (!is.null(config$lm_line)) {
# Extract necessary values # Extract necessary values
@@ -1570,7 +1564,7 @@ main <- function() {
) %>% ) %>%
filter(!is.na(L)) filter(!is.na(L))
message("Calculating background strain summary statistics") message("Calculating background summary statistics")
ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY
group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor")) group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
summary_stats_bg <- ss_bg$summary_stats summary_stats_bg <- ss_bg$summary_stats
@@ -1621,16 +1615,16 @@ main <- function() {
group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor") group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
)$df_with_stats )$df_with_stats
message("Calculating reference strain interaction scores") # message("Calculating reference strain interaction scores")
reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference") # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
df_reference_interactions_joined <- reference_results$full_data # df_reference_interactions_joined <- reference_results$full_data
df_reference_interactions <- reference_results$interactions # df_reference_interactions <- reference_results$interactions
write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) # write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
message("Generating reference interaction plots") # message("Generating reference interaction plots")
reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
message("Setting missing deletion values to the highest theoretical value at each drug conc for L") message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
df_deletion <- df_na_stats %>% # formerly X2 df_deletion <- df_na_stats %>% # formerly X2