Begin debugging

This commit is contained in:
2024-09-10 19:42:32 -04:00
parent 2a7f460897
commit 97de737ed5

View File

@@ -337,18 +337,14 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) { generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
`%||%` <- function(a, b) if (!is.null(a)) a else b `%||%` <- function(a, b) if (!is.null(a)) a else b
# Generalized plot generation
plots <- lapply(plot_configs, function(config) { plots <- lapply(plot_configs, function(config) {
df <- config$df df <- config$df
plot <- ggplot(df, aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var)))) plot <- ggplot(df, aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var))))
# Rank plots with SD annotations # Handle plot types like "rank", "correlation", and default scatter/box/density
if (config$plot_type == "rank") { if (config$plot_type == "rank") {
plot <- plot + geom_point(size = 0.1, shape = 3) plot <- plot + geom_point(size = 0.1, shape = 3)
# Adding SD bands
if (!is.null(config$sd_band)) { if (!is.null(config$sd_band)) {
for (i in seq_len(config$sd_band)) { for (i in seq_len(config$sd_band)) {
plot <- plot + plot <- plot +
@@ -357,111 +353,61 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
geom_hline(yintercept = c(-i, i), color = "gray") geom_hline(yintercept = c(-i, i), color = "gray")
} }
} }
# Optionally add enhancer/suppressor count annotations
if (!is.null(config$enhancer_label)) { if (!is.null(config$enhancer_label)) {
plot <- plot + annotate("text", x = config$enhancer_label$x, plot <- plot + annotate("text", x = config$enhancer_label$x, y = config$enhancer_label$y, label = config$enhancer_label$label) +
y = config$enhancer_label$y, label = config$enhancer_label$label) + annotate("text", x = config$suppressor_label$x, y = config$suppressor_label$y, label = config$suppressor_label$label)
annotate("text", x = config$suppressor_label$x,
y = config$suppressor_label$y, label = config$suppressor_label$label)
} }
} } else if (config$plot_type == "correlation") {
plot <- plot + geom_point(shape = 3, color = "gray70") + geom_smooth(method = "lm", color = "tomato3") +
# Correlation plots
if (config$plot_type == "correlation") {
plot <- plot + geom_point(shape = 3, color = "gray70") +
geom_smooth(method = "lm", color = "tomato3") +
annotate("text", x = 0, y = 0, label = config$correlation_text) annotate("text", x = 0, y = 0, label = config$correlation_text)
} else {
plot <- plot + aes(y = !!sym(config$y_var)) +
if (config$plot_type == "box") geom_boxplot() else
if (config$plot_type == "density") geom_density() else
if (config$plot_type == "bar") geom_bar(stat = "identity") else geom_point(shape = 3) + geom_smooth(method = "lm", se = FALSE)
} }
# General scatter/boxplot/density handling # Add error bars for "delta_bg" or general cases
if (!is.null(config$y_var)) { if (config$error_bar %||% FALSE) {
plot <- plot + aes(y = !!sym(config$y_var))
y_mean_col <- paste0("mean_", config$y_var) y_mean_col <- paste0("mean_", config$y_var)
y_sd_col <- paste0("sd_", config$y_var) y_sd_col <- paste0("sd_", config$y_var)
plot <- plot + geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
if (config$y_var == "delta_bg" && config$plot_type == "scatter") { ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
plot <- plot + geom_point(shape = 3, size = 0.2, position = "jitter") + geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
} else if (config$error_bar %||% FALSE) {
plot <- plot +
geom_point(shape = 3, size = 0.2) +
geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
}
} }
# Plot type selection # Apply y-limits if provided
plot <- switch(config$plot_type,
"box" = plot + geom_boxplot(),
"density" = plot + geom_density(),
"bar" = plot + geom_bar(stat = "identity"),
plot + geom_point() + geom_smooth(method = "lm", se = FALSE))
# Adding y-limits if provided
if (!is.null(config$ylim_vals)) { if (!is.null(config$ylim_vals)) {
plot <- plot + coord_cartesian(ylim = config$ylim_vals) plot <- plot + coord_cartesian(ylim = config$ylim_vals)
} }
# Setting legend position, titles, and labels # Apply labels, titles, and legends
legend_position <- config$legend_position %||% "bottom" plot <- plot + ggtitle(config$title) + theme_publication(legend_position = config$legend_position %||% "bottom") +
plot <- plot + ggtitle(config$title) + theme_publication(legend_position = legend_position) if (!is.null(config$x_label)) xlab(config$x_label) else NULL +
if (!is.null(config$y_label)) ylab(config$y_label) else NULL
if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label)
if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label) # Add annotations if available
# Adding text annotations if provided
if (!is.null(config$annotations)) { if (!is.null(config$annotations)) {
for (annotation in config$annotations) { plot <- plot + geom_text(aes(x = config$annotations$x, y = config$annotations$y, label = config$annotations$label))
plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label)
}
} }
return(plot) return(plot)
}) })
# If grid_layout is provided, arrange plots in a grid and save in a single PDF # Save the plots
if (!is.null(grid_layout)) { pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9) lapply(plots, print)
dev.off()
# Loop through plots in chunks defined by ncol and nrow
for (start_idx in seq(1, length(plots), by = grid_layout$ncol * grid_layout$nrow)) { plotly_plots <- lapply(plots, function(plot) suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h"))))
end_idx <- min(start_idx + grid_layout$ncol * grid_layout$nrow - 1, length(plots))
plot_subset <- plots[start_idx:end_idx] # Handle grid layout
combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow %||% length(plots), margin = 0.05)
# Arrange plots in a grid saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
do.call(grid.arrange, c(plot_subset, ncol = grid_layout$ncol, nrow = grid_layout$nrow))
}
dev.off()
plotly_plots <- lapply(plots, function(plot) {
suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))
})
combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow, margin = 0.05)
saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
} else {
# Save individual plots to PDF
pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
lapply(plots, print)
dev.off()
# Convert plots to plotly and save as HTML
plotly_plots <- lapply(plots, function(plot) {
suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))
})
combined_plot <- subplot(plotly_plots, nrows = length(plotly_plots), margin = 0.05)
saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
}
} }
generate_interaction_plot_configs <- function(df, variables) { generate_interaction_plot_configs <- function(df, variables) {
plot_configs <- list() configs <- list()
for (variable in variables) { for (variable in variables) {
# Define the y-limits based on the variable being plotted # Define the y-limits based on the variable being plotted
@@ -493,7 +439,7 @@ generate_interaction_plot_configs <- function(df, variables) {
) )
# Add scatter plot configuration for this variable # Add scatter plot configuration for this variable
plot_configs[[length(plot_configs) + 1]] <- list( configs[[length(configs) + 1]] <- list(
df = df, df = df,
x_var = "conc_num_factor", x_var = "conc_num_factor",
y_var = delta_var, y_var = delta_var,
@@ -511,7 +457,7 @@ generate_interaction_plot_configs <- function(df, variables) {
) )
# Add box plot configuration for this variable # Add box plot configuration for this variable
plot_configs[[length(plot_configs) + 1]] <- list( configs[[length(configs) + 1]] <- list(
df = df, df = df,
x_var = "conc_num_factor", x_var = "conc_num_factor",
y_var = variable, y_var = variable,
@@ -526,7 +472,7 @@ generate_interaction_plot_configs <- function(df, variables) {
) )
} }
return(plot_configs) return(configs)
} }
# Adjust missing values and calculate ranks # Adjust missing values and calculate ranks
@@ -568,6 +514,8 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA
label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ])) label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
) )
) )
return(configs)
} }
# Non-annotated version (_notext) # Non-annotated version (_notext)
@@ -588,14 +536,14 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA
} }
generate_correlation_plot_configs <- function(df, variables) { generate_correlation_plot_configs <- function(df, variables) {
plot_configs <- list() configs <- list()
for (variable in variables) { for (variable in variables) {
z_lm_var <- paste0("Z_lm_", variable) z_lm_var <- paste0("Z_lm_", variable)
avg_zscore_var <- paste0("Avg_Zscore_", variable) avg_zscore_var <- paste0("Avg_Zscore_", variable)
lm_r_squared_col <- paste0("lm_R_squared_", variable) lm_r_squared_col <- paste0("lm_R_squared_", variable)
plot_configs[[length(plot_configs) + 1]] <- list( configs[[length(configs) + 1]] <- list(
df = df, df = df,
x_var = avg_zscore_var, x_var = avg_zscore_var,
y_var = z_lm_var, y_var = z_lm_var,
@@ -607,7 +555,7 @@ generate_correlation_plot_configs <- function(df, variables) {
) )
} }
return(plot_configs) return(configs)
} }
main <- function() { main <- function() {
@@ -647,21 +595,22 @@ main <- function() {
df_na_stats <- ss$df_with_stats df_na_stats <- ss$df_with_stats
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE) write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
print("Summary stats:") # print("Summary stats:")
print(head(summary_stats), width = 200) # print(head(summary_stats), width = 200)
# Remove rows with 0 values in L # Remove rows with 0 values in L
df_no_zeros <- df_na %>% filter(L > 0) df_no_zeros <- df_na %>% filter(L > 0)
# Additional filtering for non-finite values # Additional filtering for non-finite values
# Filter and print non-finite rows, then filter and keep only finite rows
df_na_filtered <- df_na %>% df_na_filtered <- df_na %>%
filter(if_any(c(L), ~ !is.finite(.))) %>%
{ {
if (nrow(.) > 0) { non_finite_rows <- filter(., if_any(c(L), ~ !is.finite(.)))
if (nrow(non_finite_rows) > 0) {
message("Removing non-finite rows:\n") message("Removing non-finite rows:\n")
print(head(., n = 10)) print(head(non_finite_rows, n = 10))
} }
df_na %>% filter(if_all(c(L), is.finite)) filter(., if_all(c(L), is.finite))
} }
# Filter data within and outside 2SD # Filter data within and outside 2SD
@@ -871,11 +820,11 @@ main <- function() {
# Calculate interactions # Calculate interactions
variables <- c("L", "K", "r", "AUC") variables <- c("L", "K", "r", "AUC")
message("Calculating interaction scores") message("Calculating interaction scores")
print("Reference strain:") # print("Reference strain:")
print(head(reference_strain)) # print(head(reference_strain))
reference_results <- calculate_interaction_scores(reference_strain, max_conc, variables) reference_results <- calculate_interaction_scores(reference_strain, max_conc, variables)
print("Deletion strains:") # print("Deletion strains:")
print(head(deletion_strains)) # print(head(deletion_strains))
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables) deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables)
zscores_calculations_reference <- reference_results$calculations zscores_calculations_reference <- reference_results$calculations
@@ -952,7 +901,7 @@ main <- function() {
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K") generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K")
) )
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots", generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2)) plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
rank_lm_plot_config <- c( rank_lm_plot_config <- c(
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE), generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),