Move error bars to generate_scattter_plots()

This commit is contained in:
2024-10-06 14:58:54 -04:00
parent bee9aea866
commit faa82e0af4

View File

@@ -300,52 +300,45 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
ungroup() %>% # Ungroup before group_modify ungroup() %>% # Ungroup before group_modify
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
group_modify(~ { group_modify(~ {
# Check if there are enough unique conc_num_factor levels to perform lm
if (length(unique(.x$conc_num_factor)) > 1) { # Filter each column for valid data or else linear modeling will fail
valid_data_L <- .x %>% filter(!is.na(Delta_L))
valid_data_K <- .x %>% filter(!is.na(Delta_K))
valid_data_r <- .x %>% filter(!is.na(Delta_r))
valid_data_AUC <- .x %>% filter(!is.na(Delta_AUC))
# Perform linear modeling # Perform linear modeling
lm_L <- lm(Delta_L ~ conc_num_factor, data = .x) lm_L <- if (nrow(valid_data_L) > 1) lm(Delta_L ~ conc_num_factor, data = valid_data_L) else NULL
lm_K <- lm(Delta_K ~ conc_num_factor, data = .x) lm_K <- if (nrow(valid_data_K) > 1) lm(Delta_K ~ conc_num_factor, data = valid_data_K) else NULL
lm_r <- lm(Delta_r ~ conc_num_factor, data = .x) lm_r <- if (nrow(valid_data_r) > 1) lm(Delta_r ~ conc_num_factor, data = valid_data_r) else NULL
lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x) lm_AUC <- if (nrow(valid_data_AUC) > 1) lm(Delta_AUC ~ conc_num_factor, data = valid_data_AUC) else NULL
# If the model fails, set model-related values to NA # Extract coefficients for calculations and plotting
.x %>% .x %>%
mutate( mutate(
lm_intercept_L = ifelse(!is.null(lm_L), coef(lm_L)[1], NA), lm_intercept_L = if (!is.null(lm_L)) coef(lm_L)[1] else NA,
lm_slope_L = ifelse(!is.null(lm_L), coef(lm_L)[2], NA), lm_slope_L = if (!is.null(lm_L)) coef(lm_L)[2] else NA,
R_Squared_L = ifelse(!is.null(lm_L), summary(lm_L)$r.squared, NA), R_Squared_L = if (!is.null(lm_L)) summary(lm_L)$r.squared else NA,
lm_Score_L = ifelse(!is.null(lm_L), max_conc * coef(lm_L)[2] + coef(lm_L)[1], NA), lm_Score_L = if (!is.null(lm_L)) max_conc * coef(lm_L)[2] + coef(lm_L)[1] else NA,
lm_intercept_K = ifelse(!is.null(lm_K), coef(lm_K)[1], NA), lm_intercept_K = if (!is.null(lm_K)) coef(lm_K)[1] else NA,
lm_slope_K = ifelse(!is.null(lm_K), coef(lm_K)[2], NA), lm_slope_K = if (!is.null(lm_K)) coef(lm_K)[2] else NA,
R_Squared_K = ifelse(!is.null(lm_K), summary(lm_K)$r.squared, NA), R_Squared_K = if (!is.null(lm_K)) summary(lm_K)$r.squared else NA,
lm_Score_K = ifelse(!is.null(lm_K), max_conc * coef(lm_K)[2] + coef(lm_K)[1], NA), lm_Score_K = if (!is.null(lm_K)) max_conc * coef(lm_K)[2] + coef(lm_K)[1] else NA,
lm_intercept_r = ifelse(!is.null(lm_r), coef(lm_r)[1], NA), lm_intercept_r = if (!is.null(lm_r)) coef(lm_r)[1] else NA,
lm_slope_r = ifelse(!is.null(lm_r), coef(lm_r)[2], NA), lm_slope_r = if (!is.null(lm_r)) coef(lm_r)[2] else NA,
R_Squared_r = ifelse(!is.null(lm_r), summary(lm_r)$r.squared, NA), R_Squared_r = if (!is.null(lm_r)) summary(lm_r)$r.squared else NA,
lm_Score_r = ifelse(!is.null(lm_r), max_conc * coef(lm_r)[2] + coef(lm_r)[1], NA), lm_Score_r = if (!is.null(lm_r)) max_conc * coef(lm_r)[2] + coef(lm_r)[1] else NA,
lm_intercept_AUC = ifelse(!is.null(lm_AUC), coef(lm_AUC)[1], NA), lm_intercept_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[1] else NA,
lm_slope_AUC = ifelse(!is.null(lm_AUC), coef(lm_AUC)[2], NA), lm_slope_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[2] else NA,
R_Squared_AUC = ifelse(!is.null(lm_AUC), summary(lm_AUC)$r.squared, NA), R_Squared_AUC = if (!is.null(lm_AUC)) summary(lm_AUC)$r.squared else NA,
lm_Score_AUC = ifelse(!is.null(lm_AUC), max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1], NA) lm_Score_AUC = if (!is.null(lm_AUC)) max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1] else NA
) )
} else {
# If not enough conc_num_factor levels, set lm-related values to NA
.x %>%
mutate(
lm_intercept_L = NA, lm_slope_L = NA, R_Squared_L = NA, lm_Score_L = NA,
lm_intercept_K = NA, lm_slope_K = NA, R_Squared_K = NA, lm_Score_K = NA,
lm_intercept_r = NA, lm_slope_r = NA, R_Squared_r = NA, lm_Score_r = NA,
lm_intercept_AUC = NA, lm_slope_AUC = NA, R_Squared_AUC = NA, lm_Score_AUC = NA
)
}
}) %>% }) %>%
ungroup() ungroup()
# For interaction plot error bars # For interaction plot error bars
delta_means_sds <- calculations %>% delta_means_sds <- calculations %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
@@ -631,6 +624,93 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
} }
} }
# Convert ggplot to plotly for interactive version
plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
# Store both static and interactive versions
static_plots[[i]] <- plot
plotly_plots[[i]] <- plotly_plot
}
# Print the plots in the current group to the PDF
if (!is.null(grid_layout)) {
# Set grid_ncol to 1 if not specified
if (is.null(grid_layout$ncol)) {
grid_layout$ncol <- 1
}
# If ncol is set but nrow is not, calculate nrow dynamically based on num_plots
if (!is.null(grid_layout$ncol) && is.null(grid_layout$nrow)) {
num_plots <- length(static_plots)
nrow <- ceiling(num_plots / grid_layout$ncol)
# message("No nrow provided, automatically using nrow = ", nrow)
grid_layout$nrow <- nrow
}
total_spots <- grid_layout$nrow * grid_layout$ncol
num_plots <- length(static_plots)
if (num_plots < total_spots) {
message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
}
# Print a page of gridded plots
grid.arrange(
grobs = static_plots,
ncol = grid_layout$ncol,
nrow = grid_layout$nrow)
} else {
# Print individual plots on separate pages if no grid layout
for (plot in static_plots) {
print(plot)
}
}
}
# Close the PDF device after all plots are done
dev.off()
# Save HTML file with interactive plots if needed
out_html_file <- file.path(out_dir, paste0(filename, ".html"))
message("Saving combined HTML file: ", out_html_file)
htmltools::save_html(
htmltools::tagList(plotly_plots),
file = out_html_file
)
}
generate_scatter_plot <- function(plot, config) {
# Define the points
shape <- if (!is.null(config$shape)) config$shape else 3
size <- if (!is.null(config$size)) config$size else 1.5
position <-
if (!is.null(config$position) && config$position == "jitter") {
position_jitter(width = 0.4, height = 0.1)
} else {
"identity"
}
plot <- plot + geom_point(
shape = shape,
size = size,
position = position
)
# Add a cyan point for the reference data for correlation plots
if (!is.null(config$cyan_points) && config$cyan_points) {
plot <- plot + geom_point(
data = config$df_reference,
mapping = aes(x = .data[[config$x_var]], y = .data[[config$y_var]]),
color = "cyan",
shape = 3,
size = 0.5,
inherit.aes = FALSE
)
}
# Add error bars if specified # Add error bars if specified
if (!is.null(config$error_bar) && config$error_bar) { if (!is.null(config$error_bar) && config$error_bar) {
# Check if custom columns are provided for y_mean and y_sd, or use the defaults # Check if custom columns are provided for y_mean and y_sd, or use the defaults
@@ -703,92 +783,6 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
} }
} }
# Convert ggplot to plotly for interactive version
plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
# Store both static and interactive versions
static_plots[[i]] <- plot
plotly_plots[[i]] <- plotly_plot
}
# Print the plots in the current group to the PDF
if (!is.null(grid_layout)) {
# Set grid_ncol to 1 if not specified
if (is.null(grid_layout$ncol)) {
grid_layout$ncol <- 1
}
# If ncol is set but nrow is not, calculate nrow dynamically based on num_plots
if (!is.null(grid_layout$ncol) && is.null(grid_layout$nrow)) {
num_plots <- length(static_plots)
nrow <- ceiling(num_plots / grid_layout$ncol)
# message("No nrow provided, automatically using nrow = ", nrow)
grid_layout$nrow <- nrow
}
total_spots <- grid_layout$nrow * grid_layout$ncol
num_plots <- length(static_plots)
# if (num_plots < total_spots) {
# message("Filling ", total_spots - num_plots, " empty spots with nullGrob()")
# static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
# }
grid.arrange(
grobs = static_plots,
ncol = grid_layout$ncol,
nrow = grid_layout$nrow
)
} else {
# Print individual plots on separate pages if no grid layout
for (plot in static_plots) {
print(plot)
}
}
}
# Close the PDF device after all plots are done
dev.off()
# Save HTML file with interactive plots if needed
out_html_file <- file.path(out_dir, paste0(filename, ".html"))
message("Saving combined HTML file: ", out_html_file)
htmltools::save_html(
htmltools::tagList(plotly_plots),
file = out_html_file
)
}
generate_scatter_plot <- function(plot, config) {
# Define the points
shape <- if (!is.null(config$shape)) config$shape else 3
size <- if (!is.null(config$size)) config$size else 1.5
position <-
if (!is.null(config$position) && config$position == "jitter") {
position_jitter(width = 0.4, height = 0.1)
} else {
"identity"
}
plot <- plot + geom_point(
shape = shape,
size = size,
position = position
)
# Add a cyan point for the reference data for correlation plots
if (!is.null(config$cyan_points) && config$cyan_points) {
plot <- plot + geom_point(
data = config$df_reference,
mapping = aes(x = .data[[config$x_var]], y = .data[[config$y_var]]),
color = "cyan",
shape = 3,
size = 0.5,
inherit.aes = FALSE
)
}
# Add linear regression line if specified # Add linear regression line if specified
if (!is.null(config$lm_line)) { if (!is.null(config$lm_line)) {
# Extract necessary values # Extract necessary values
@@ -1570,7 +1564,7 @@ main <- function() {
) %>% ) %>%
filter(!is.na(L)) filter(!is.na(L))
message("Calculating background strain summary statistics") message("Calculating background summary statistics")
ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY
group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor")) group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
summary_stats_bg <- ss_bg$summary_stats summary_stats_bg <- ss_bg$summary_stats
@@ -1621,16 +1615,16 @@ main <- function() {
group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor") group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
)$df_with_stats )$df_with_stats
message("Calculating reference strain interaction scores") # message("Calculating reference strain interaction scores")
reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference") # reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
df_reference_interactions_joined <- reference_results$full_data # df_reference_interactions_joined <- reference_results$full_data
df_reference_interactions <- reference_results$interactions # df_reference_interactions <- reference_results$interactions
write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) # write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) # write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
message("Generating reference interaction plots") # message("Generating reference interaction plots")
reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
message("Setting missing deletion values to the highest theoretical value at each drug conc for L") message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
df_deletion <- df_na_stats %>% # formerly X2 df_deletion <- df_na_stats %>% # formerly X2