Move y-limit filtering from generate_and_save_plots to generate_interaction_plot_configs

This commit is contained in:
2024-10-06 17:47:18 -04:00
parent 0c60980ba9
commit c6fec56630

View File

@@ -525,36 +525,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
config <- group$plots[[i]] config <- group$plots[[i]]
df <- config$df df <- config$df
# Filter and debug out-of-bounds data
if (!is.null(config$ylim_vals)) {
out_of_bounds <- df %>%
filter(
is.na(.data[[config$y_var]]) |
.data[[config$y_var]] < config$ylim_vals[1] |
.data[[config$y_var]] > config$ylim_vals[2]
)
if (nrow(out_of_bounds) > 0) {
message("Filtered ", nrow(out_of_bounds), " row(s) from '", config$title, "' because ", config$y_var,
" is outside of y-limits: [", config$ylim_vals[1], ", ", config$ylim_vals[2], "]:")
# print(out_of_bounds %>% select(OrfRep, Gene, num, Drug, scan, Plate, Row, Col, conc_num, all_of(config$y_var)), width = 1000)
}
df <- df %>%
filter(
!is.na(.data[[config$y_var]]) &
.data[[config$y_var]] >= config$ylim_vals[1] &
.data[[config$y_var]] <= config$ylim_vals[2]
)
}
# Filter NAs # Filter NAs
if (!is.null(config$filter_na) && config$filter_na) { if (!is.null(config$filter_na) && config$filter_na) {
df <- df %>% df <- df %>%
filter(!is.na(.data[[config$y_var]])) filter(!is.na(.data[[config$y_var]]))
} }
# TODO for now skip all NA plots NA data
# Eventually add to own or filter_na block so we can handle selectively
if (nrow(df) == 0) { if (nrow(df) == 0) {
message("No data available after filtering for plot ", config$title) message("Insufficient data for plot:", config$title)
next # Skip this plot if no data is available next # skip plot if insufficient data is available
} }
aes_mapping <- if (config$plot_type == "bar") { aes_mapping <- if (config$plot_type == "bar") {
@@ -599,10 +580,10 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label) if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
if (!is.null(config$coord_cartesian)) plot <- plot + coord_cartesian(ylim = config$coord_cartesian) if (!is.null(config$coord_cartesian)) plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
plotly_plot <- suppressWarnings(plotly::ggplotly(plot)) #plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
static_plots[[i]] <- plot static_plots[[i]] <- plot
plotly_plots[[i]] <- plotly_plot #plotly_plots[[i]] <- plotly_plot
} }
grid_layout <- group$grid_layout grid_layout <- group$grid_layout
@@ -624,16 +605,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE)) static_plots <- c(static_plots, replicate(total_spots - num_plots, nullGrob(), simplify = FALSE))
} }
tryCatch({ grid.arrange(
grid.arrange( grobs = static_plots,
grobs = static_plots, ncol = grid_layout$ncol,
ncol = grid_layout$ncol, nrow = grid_layout$nrow
nrow = grid_layout$nrow )
)
}, error = function(e) {
message("Error in grid.arrange: ", e$message)
print(static_plots)
})
} else { } else {
for (plot in static_plots) { for (plot in static_plots) {
@@ -644,15 +620,14 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
dev.off() dev.off()
out_html_file <- file.path(out_dir, paste0(filename, ".html")) # out_html_file <- file.path(out_dir, paste0(filename, ".html"))
message("Saving combined HTML file: ", out_html_file) # message("Saving combined HTML file: ", out_html_file)
htmltools::save_html( # htmltools::save_html(
htmltools::tagList(plotly_plots), # htmltools::tagList(plotly_plots),
file = out_html_file # file = out_html_file
) # )
} }
generate_scatter_plot <- function(plot, config) { generate_scatter_plot <- function(plot, config) {
# Define the points # Define the points
@@ -790,7 +765,7 @@ generate_scatter_plot <- function(plot, config) {
) )
} }
} else { } else {
message("Skipping linear modeling line due to y-values outside of limits.") message("Skipping linear regression line due to y-values outside of limits")
} }
} else { } else {
# If no y-limits are provided, proceed with the annotation # If no y-limits are provided, proceed with the annotation
@@ -805,7 +780,7 @@ generate_scatter_plot <- function(plot, config) {
) )
} }
} else { } else {
message("Skipping linear modeling line due to missing or invalid values.") message("Skipping linear regression line due to missing or invalid values")
} }
} }
@@ -984,7 +959,6 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
) )
plot_config$position <- "jitter" plot_config$position <- "jitter"
# Cannot figure out how to place these properly for discrete x-axis so let's be hacky
annotations <- list( annotations <- list(
list(x = 0.25, y = y_limits[1] + 0.08 * y_span, label = " NG =", size = 4), list(x = 0.25, y = y_limits[1] + 0.08 * y_span, label = " NG =", size = 4),
list(x = 0.25, y = y_limits[1] + 0.04 * y_span, label = " DB =", size = 4), list(x = 0.25, y = y_limits[1] + 0.04 * y_span, label = " DB =", size = 4),
@@ -1045,26 +1019,50 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
for (var in names(delta_limits_map)) { for (var in names(delta_limits_map)) {
y_limits <- delta_limits_map[[var]] y_limits <- delta_limits_map[[var]]
y_span <- y_limits[2] - y_limits[1] y_span <- y_limits[2] - y_limits[1]
y_var_name <- paste0("Delta_", var)
WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0) # Anti-filter to select out-of-bounds rows
Z_Shift_value <- round(first(group_data[[paste0("Z_Shift_", var)]], default = 0), 2) out_of_bounds <- group_data %>%
Z_lm_value <- round(first(group_data[[paste0("Z_lm_", var)]], default = 0), 2) filter(is.na(.data[[y_var_name]]) |
R_squared_value <- round(first(group_data[[paste0("R_Squared_", var)]], default = 0), 2) .data[[y_var_name]] < y_limits[1] |
.data[[y_var_name]] > y_limits[2])
NG_value <- first(group_data$NG, default = 0) if (nrow(out_of_bounds) > 0) {
DB_value <- first(group_data$DB, default = 0) message(sprintf("Filtered %d row(s) from '%s' because %s is outside of y-limits: [%f, %f]",
SM_value <- first(group_data$SM, default = 0) nrow(out_of_bounds), OrfRepTitle, y_var_name, y_limits[1], y_limits[2]
))
}
# Do the actual filtering
group_data_filtered <- group_data %>%
filter(!is.na(.data[[y_var_name]]) &
.data[[y_var_name]] >= y_limits[1] &
.data[[y_var_name]] <= y_limits[2])
if (nrow(group_data_filtered) == 0) {
message("Insufficient data for plot: ", OrfRepTitle, " ", var)
next # skip plot if insufficient data is available
}
WT_sd_value <- first(group_data_filtered[[paste0("WT_sd_", var)]], default = 0)
Z_Shift_value <- round(first(group_data_filtered[[paste0("Z_Shift_", var)]], default = 0), 2)
Z_lm_value <- round(first(group_data_filtered[[paste0("Z_lm_", var)]], default = 0), 2)
R_squared_value <- round(first(group_data_filtered[[paste0("R_Squared_", var)]], default = 0), 2)
NG_value <- first(group_data_filtered$NG, default = 0)
DB_value <- first(group_data_filtered$DB, default = 0)
SM_value <- first(group_data_filtered$SM, default = 0)
lm_intercept_col <- paste0("lm_intercept_", var) lm_intercept_col <- paste0("lm_intercept_", var)
lm_slope_col <- paste0("lm_slope_", var) lm_slope_col <- paste0("lm_slope_", var)
lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0) lm_intercept_value <- first(group_data_filtered[[lm_intercept_col]], default = 0)
lm_slope_value <- first(group_data[[lm_slope_col]], default = 0) lm_slope_value <- first(group_data_filtered[[lm_slope_col]], default = 0)
plot_config <- list( plot_config <- list(
df = group_data, df = group_data_filtered,
plot_type = "scatter", plot_type = "scatter",
x_var = "conc_num_factor_factor", x_var = "conc_num_factor_factor",
y_var = paste0("Delta_", var), y_var = y_var_name,
x_label = paste0("[", unique(df_summary$Drug)[1], "]"), x_label = paste0("[", unique(df_summary$Drug)[1], "]"),
shape = 16, shape = 16,
title = paste(OrfRepTitle, Gene, sep = " "), title = paste(OrfRepTitle, Gene, sep = " "),
@@ -1087,17 +1085,16 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
color = "gray70", color = "gray70",
linewidth = 0.5 linewidth = 0.5
), ),
x_breaks = unique(group_data$conc_num_factor_factor), x_breaks = unique(group_data_filtered$conc_num_factor_factor),
x_labels = as.character(unique(group_data$conc_num)), x_labels = as.character(unique(group_data_filtered$conc_num)),
ylim_vals = y_limits, ylim_vals = y_limits,
# filter_na = TRUE,
lm_line = list( lm_line = list(
intercept = lm_intercept_value, intercept = lm_intercept_value,
slope = lm_slope_value, slope = lm_slope_value,
color = "blue", color = "blue",
linewidth = 0.8, linewidth = 0.8,
x_min = min(as.numeric(group_data$conc_num_factor_factor)), x_min = min(as.numeric(group_data_filtered$conc_num_factor_factor)),
x_max = max(as.numeric(group_data$conc_num_factor_factor)) x_max = max(as.numeric(group_data_filtered$conc_num_factor_factor))
) )
) )
delta_plot_configs <- append(delta_plot_configs, list(plot_config)) delta_plot_configs <- append(delta_plot_configs, list(plot_config))
@@ -1587,16 +1584,16 @@ main <- function() {
group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor") group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor")
)$df_with_stats )$df_with_stats
# message("Calculating reference strain interaction scores") message("Calculating reference strain interaction scores")
# reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference") reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference")
# df_reference_interactions_joined <- reference_results$full_data df_reference_interactions_joined <- reference_results$full_data
# df_reference_interactions <- reference_results$interactions df_reference_interactions <- reference_results$interactions
# write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) write.csv(reference_results$calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
# write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
# message("Generating reference interaction plots") message("Generating reference interaction plots")
# reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference")
# generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16)
message("Setting missing deletion values to the highest theoretical value at each drug conc for L") message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
df_deletion <- df_na_stats %>% # formerly X2 df_deletion <- df_na_stats %>% # formerly X2