Move y-limit filtering from generate_and_save_plots to generate_interaction_plot_configs

This commit is contained in:
2024-10-06 17:47:18 -04:00
parent 0c60980ba9
commit c6fec56630

View File

@@ -525,36 +525,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
config <- group$plots[[i]]
df <- config$df
# Filter and debug out-of-bounds data
if (!is.null(config$ylim_vals)) {
out_of_bounds <- df %>%
filter(
is.na(.data[[config$y_var]]) |
.data[[config$y_var]] < config$ylim_vals[1] |
.data[[config$y_var]] > config$ylim_vals[2]
)
if (nrow(out_of_bounds) > 0) {
message("Filtered ", nrow(out_of_bounds), " row(s) from '", config$title, "' because ", config$y_var,
" is outside of y-limits: [", config$ylim_vals[1], ", ", config$ylim_vals[2], "]:")
# print(out_of_bounds %>% select(OrfRep, Gene, num, Drug, scan, Plate, Row, Col, conc_num, all_of(config$y_var)), width = 1000)
}
df <- df %>%
filter(
!is.na(.data[[config$y_var]]) &
.data[[config$y_var]] >= config$ylim_vals[1] &
.data[[config$y_var]] <= config$ylim_vals[2]
)
}
# Filter NAs
if (!is.null(config$filter_na) && config$filter_na) {
df <- df %>%
filter(!is.na(.data[[config$y_var]]))
}
# TODO for now skip all NA plots NA data
# Eventually add to own or filter_na block so we can handle selectively
if (nrow(df) == 0) {
message("No data available after filtering for plot ", config$title)
next # Skip this plot if no data is available
message("Insufficient data for plot:", config$title)
next # skip plot if insufficient data is available
}
aes_mapping <- if (config$plot_type == "bar") {
@@ -599,10 +580,10 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
if (!is.null(config$coord_cartesian)) plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
#plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
static_plots[[i]] <- plot
plotly_plots[[i]] <- plotly_plot
#plotly_plots[[i]] <- plotly_plot
}
grid_layout <- group$grid_layout
@@ -624,16 +605,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
}
tryCatch({
grid.arrange(
grobs = static_plots,
ncol = grid_layout$ncol,
nrow = grid_layout$nrow
)
}, error = function(e) {
message("Error in grid.arrange: ", e$message)
print(static_plots)
})
grid.arrange(
grobs = static_plots,
ncol = grid_layout$ncol,
nrow = grid_layout$nrow
)
} else {
for (plot in static_plots) {
@@ -644,15 +620,14 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
dev.off()
out_html_file <- file.path(out_dir, paste0(filename, ".html"))
message("Saving combined HTML file: ", out_html_file)
htmltools::save_html(
htmltools::tagList(plotly_plots),
file = out_html_file
)
# out_html_file <- file.path(out_dir, paste0(filename, ".html"))
# message("Saving combined HTML file: ", out_html_file)
# htmltools::save_html(
# htmltools::tagList(plotly_plots),
# file = out_html_file
# )
}
generate_scatter_plot <- function(plot, config) {
# Define the points
@@ -790,7 +765,7 @@ generate_scatter_plot <- function(plot, config) {
)
}
} else {
message("Skipping linear modeling line due to y-values outside of limits.")
message("Skipping linear regression line due to y-values outside of limits")
}
} else {
# If no y-limits are provided, proceed with the annotation
@@ -805,7 +780,7 @@ generate_scatter_plot <- function(plot, config) {
)
}
} else {
message("Skipping linear modeling line due to missing or invalid values.")
message("Skipping linear regression line due to missing or invalid values")
}
}
@@ -984,7 +959,6 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
)
plot_config$position <- "jitter"
# Cannot figure out how to place these properly for discrete x-axis so let's be hacky
annotations <- list(
list(x = 0.25, y = y_limits[1] + 0.08 * y_span, label = " NG =", size = 4),
list(x = 0.25, y = y_limits[1] + 0.04 * y_span, label = " DB =", size = 4),
@@ -1045,26 +1019,50 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
for (var in names(delta_limits_map)) {
y_limits <- delta_limits_map[[var]]
y_span <- y_limits[2] - y_limits[1]
y_var_name <- paste0("Delta_", var)
WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0)
Z_Shift_value <- round(first(group_data[[paste0("Z_Shift_", var)]], default = 0), 2)
Z_lm_value <- round(first(group_data[[paste0("Z_lm_", var)]], default = 0), 2)
R_squared_value <- round(first(group_data[[paste0("R_Squared_", var)]], default = 0), 2)
# Anti-filter to select out-of-bounds rows
out_of_bounds <- group_data %>%
filter(is.na(.data[[y_var_name]]) |
.data[[y_var_name]] < y_limits[1] |
.data[[y_var_name]] > y_limits[2])
NG_value <- first(group_data$NG, default = 0)
DB_value <- first(group_data$DB, default = 0)
SM_value <- first(group_data$SM, default = 0)
if (nrow(out_of_bounds) > 0) {
message(sprintf("Filtered %d row(s) from '%s' because %s is outside of y-limits: [%f, %f]",
nrow(out_of_bounds), OrfRepTitle, y_var_name, y_limits[1], y_limits[2]
))
}
# Do the actual filtering
group_data_filtered <- group_data %>%
filter(!is.na(.data[[y_var_name]]) &
.data[[y_var_name]] >= y_limits[1] &
.data[[y_var_name]] <= y_limits[2])
if (nrow(group_data_filtered) == 0) {
message("Insufficient data for plot: ", OrfRepTitle, " ", var)
next # skip plot if insufficient data is available
}
WT_sd_value <- first(group_data_filtered[[paste0("WT_sd_", var)]], default = 0)
Z_Shift_value <- round(first(group_data_filtered[[paste0("Z_Shift_", var)]], default = 0), 2)
Z_lm_value <- round(first(group_data_filtered[[paste0("Z_lm_", var)]], default = 0), 2)
R_squared_value <- round(first(group_data_filtered[[paste0("R_Squared_", var)]], default = 0), 2)
NG_value <- first(group_data_filtered$NG, default = 0)
DB_value <- first(group_data_filtered$DB, default = 0)
SM_value <- first(group_data_filtered$SM, default = 0)
lm_intercept_col <- paste0("lm_intercept_", var)
lm_slope_col <- paste0("lm_slope_", var)
lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0)
lm_slope_value <- first(group_data[[lm_slope_col]], default = 0)
lm_intercept_value <- first(group_data_filtered[[lm_intercept_col]], default = 0)
lm_slope_value <- first(group_data_filtered[[lm_slope_col]], default = 0)
plot_config <- list(
df = group_data,
df = group_data_filtered,
plot_type = "scatter",
x_var = "conc_num_factor_factor",
y_var = paste0("Delta_", var),
y_var = y_var_name,
x_label = paste0("[", unique(df_summary$Drug)[1], "]"),
shape = 16,
title = paste(OrfRepTitle, Gene, sep = " "),
@@ -1087,17 +1085,16 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
color = "gray70",
linewidth = 0.5
),
x_breaks = unique(group_data$conc_num_factor_factor),
x_labels = as.character(unique(group_data$conc_num)),
x_breaks = unique(group_data_filtered$conc_num_factor_factor),
x_labels = as.character(unique(group_data_filtered$conc_num)),
ylim_vals = y_limits,
# filter_na = TRUE,
lm_line = list(
intercept = lm_intercept_value,
slope = lm_slope_value,
color = "blue",
linewidth = 0.8,
x_min = min(as.numeric(group_data$conc_num_factor_factor)),
x_max = max(as.numeric(group_data$conc_num_factor_factor))
x_min = min(as.numeric(group_data_filtered$conc_num_factor_factor)),
x_max = max(as.numeric(group_data_filtered$conc_num_factor_factor))
)
)
delta_plot_configs <- append(delta_plot_configs, list(plot_config))
@@ -1587,16 +1584,16 @@ main <- function() {
group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
)$df_with_stats
# message("Calculating reference strain interaction scores")
# reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
# df_reference_interactions_joined <- reference_results$full_data
# df_reference_interactions <- reference_results$interactions
# write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
# write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
message("Calculating reference strain interaction scores")
reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
df_reference_interactions_joined <- reference_results$full_data
df_reference_interactions <- reference_results$interactions
write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
# message("Generating reference interaction plots")
# reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
# generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
message("Generating reference interaction plots")
reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
df_deletion <- df_na_stats %>% # formerly X2