Break out interaction plots

This commit is contained in:
2024-09-05 00:08:07 -04:00
parent 10660e40d0
commit 75ba5939a3

View File

@@ -205,8 +205,6 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
return(summary_stats)
}
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
# Calculate total concentration variables
@@ -329,6 +327,37 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
}
interaction_plot_configs <- function(df, variable) {
ylim_vals <- switch(variable,
"L" = c(-65, 65),
"K" = c(-65, 65),
"r" = c(-0.65, 0.65),
"AUC" = c(-6500, 6500),
NULL
)
wt_sd <- paste0("WT_sd_", variable)
delta_var <- paste0("Delta_", variable)
z_shift <- paste0("Z_Shift_", variable)
z_lm <- paste0("Z_lm_", variable)
list(
x_var = "conc_num_factor",
y_var = delta_var,
plot_type = "scatter",
title = paste("Scatter plot for", variable),
ylim_vals = ylim_vals,
annotations = list(
list(x = 1, y = max(ylim_vals) * 0.7, label = paste("ZShift =", round(df[[z_shift]], 2))),
list(x = 1, y = max(ylim_vals) * 0.5, label = paste("Z lm Score =", round(df[[z_lm]], 2))),
list(x = 1, y = min(ylim_vals) * 0.7, label = paste("NG =", df$NG)),
list(x = 1, y = min(ylim_vals) * 0.85, label = paste("DB =", df$DB)),
list(x = 1, y = min(ylim_vals) * 1.1, label = paste("SM =", df$SM))
)
)
}
generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_num",
title, x_label = NULL, y_label = NULL, ylim_vals = NULL) {
@@ -360,7 +389,7 @@ generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_
return(plot)
}
generate_and_save_plots <- function(df, output_dir, file_prefix, variables, plot_type = "scatter", include_qc = FALSE, ylim_vals = NULL) {
generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) {
plots <- list()
if (nrow(df) == 0) {
@@ -370,29 +399,35 @@ generate_and_save_plots <- function(df, output_dir, file_prefix, variables, plot
message("Generating plots for \"", deparse(substitute(df)), "\" dataframe")
# Create plots for the given variables
for (var in variables) {
for (config in plot_configs) {
plot <- generate_plot(
df = df,
x_var = "scan",
y_var = var,
plot_type = plot_type,
title = paste(file_prefix, "Plot for", var),
ylim_vals = ylim_vals
x_var = config$x_var,
y_var = config$y_var,
plot_type = config$plot_type,
title = config$title,
ylim_vals = config$ylim_vals
)
plots[[paste0(var, "_", plot_type)]] <- plot
# If custom annotations for interaction plots are required
if (!is.null(config$annotations)) {
for (annotation in config$annotations) {
plot <- plot +
annotate(
"text", x = annotation$x, y = annotation$y, label = annotation$label
)
}
}
# Store the plot with the variable name and plot type
plots[[paste0(config$y_var, "_", config$plot_type)]] <- plot
}
# Include additional QC plots if requested
if (include_qc) {
plots[["Raw_L_vs_K"]] <- generate_plot(df, "L", "K", "scatter", title = "Raw L vs K before QC")
plots[["Delta_bg_Density"]] <- generate_plot(df, "delta_bg", NULL, "density", title = "Density plot for Delta Background")
plots[["Delta_bg_Bar"]] <- generate_plot(df, "delta_bg", NULL, "bar", title = "Bar plot for Delta Background")
}
save_plots(file_prefix, plots, output_dir)
}
# Ensure all plots are saved and printed to PDF
save_plots <- function(file_name, plot_list, output_dir) {
# Save to PDF
@@ -613,9 +648,22 @@ main <- function() {
# QC steps and filtering
df_above_tolerance <- df %>% filter(DB == 1)
df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .x)))
df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .)))
df_no_zeros <- df_na %>% filter(L > 0)
# Calculate the half-medians for `L` and `K` for rows above tolerance
L_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
K_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
# Get the number of rows that are above tolerance
rows_to_remove <- nrow(df_above_tolerance)
# Logging or handling the calculated values, e.g.:
message("Half-median for L (above tolerance): ", L_half_median)
message("Half-median for K (above tolerance): ", K_half_median)
message("Number of rows above tolerance: ", rows_to_remove)
# Additional filtering for non-finite values in df_na
df_na_filtered <- df_na %>%
filter(if_any(c(L), ~ !is.finite(.))) %>%
{
@@ -626,15 +674,36 @@ main <- function() {
df_na %>% filter(if_all(c(L), is.finite))
}
# Generate and save QC plots using the new generalized function
# message("Generating QC plots")
# variables <- c("L", "K", "r", "AUC", "delta_bg")
# generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
# generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
# generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", variables)
# generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
# Define the plot configurations for each QC step
qc_plot_configs <- list(
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "Raw L vs K before QC", ylim_vals = NULL),
list(x_var = "delta_bg", y_var = NULL, plot_type = "density", title = "Density plot for Delta Background", ylim_vals = NULL),
list(x_var = "delta_bg", y_var = NULL, plot_type = "bar", title = "Bar plot for Delta Background", ylim_vals = NULL)
)
above_tolerance_plot_configs <- list(
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "L vs K for Rows Above Delta Background Threshold", ylim_vals = NULL)
)
after_qc_plot_configs <- list(
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "L vs K After QC Filtering", ylim_vals = NULL)
)
no_zeros_plot_configs <- list(
list(x_var = "L", y_var = "K", plot_type = "scatter", title = "L vs K for Non-Zero L Values", ylim_vals = NULL)
)
# Generate and save plots for each QC step
message("Generating QC plots")
generate_and_save_plots(df, out_dir_qc, "Before_QC", qc_plot_configs)
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Above_Tolerance", above_tolerance_plot_configs)
generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", after_qc_plot_configs)
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", no_zeros_plot_configs)
# Clean up
rm(df, df_above_tolerance, df_no_zeros)
rm(df, df_above_tolerance, df_no_zeros)
# Calculate summary statistics
message("Calculating summary statistics for all strains")
@@ -790,11 +859,20 @@ main <- function() {
write.csv(suppressors_lm_K,
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE)
# Generate plots for interaction scores
# generate_rf_plots(zscores_calculations_reference, zscores_interactions_reference, out_dir)
# generate_rf_plots(zscores_calculations, zscores_interactions, out_dir)
generate_and_save_plots(zscores_calculations_reference, out_dir, "Reference_Calculations", variables)
generate_and_save_plots(zscores_calculations, out_dir, "Deletion_Calculations", variables)
# Interaction plots for reference strain
variables <- c("L", "K", "r", "AUC")
reference_plot_configs <- lapply(variables, function(var) {
interaction_plot_configs(zscores_calculations_reference, var)
})
generate_and_save_plots(zscores_calculations_reference, out_dir, "RF_InteractionPlots", reference_plot_configs)
# Interaction plots for deletion strains
deletion_plot_configs <- lapply(variables, function(var) {
interaction_plot_configs(zscores_calculations, var)
})
generate_and_save_plots(zscores_calculations, out_dir, "InteractionPlots", deletion_plot_configs)
# Apply filtering to remove NA values before further analysis
zscores_interactions_filtered <- zscores_interactions %>%