Break apart rank plot code

This commit is contained in:
2024-09-16 19:17:01 -04:00
parent 91fc9ecfda
commit 9fe45ba73f

View File

@@ -472,31 +472,32 @@ generate_scatter_plot <- function(plot, config) {
} else { } else {
plot <- plot + geom_point( plot <- plot + geom_point(
shape = config$shape %||% 3, shape = config$shape %||% 3,
position = if (!is.null(config$position) && config$position == "jitter") "jitter" else "identity" position = if (!is.null(config$position) && config$position == "jitter") "jitter" else "identity",
size = config$size %||% 0.1
) )
} }
# Add smooth line if specified # Add smooth line if specified
if (!is.null(config$add_smooth) && config$add_smooth) { if (!is.null(config$add_smooth) && config$add_smooth) {
plot <- if (!is.null(config$lm_line)) { if (!is.null(config$lm_line)) {
plot + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope) plot <- plot + geom_abline(intercept = config$lm_line$intercept, slope = config$lm_line$slope, color = "blue")
} else { } else {
plot + geom_smooth(method = "lm", se = FALSE) plot <- plot + geom_smooth(method = "lm", se = FALSE, color = "blue")
} }
} }
# Add SD bands (iterate over sd_band only here) # Add SD bands if specified
if (!is.null(config$sd_band)) { if (!is.null(config$sd_band_values)) {
for (i in config$sd_band) { for (sd_band in config$sd_band_values) {
plot <- plot + plot <- plot +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = sd_band, ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) + annotate("rect", xmin = -Inf, xmax = Inf, ymin = -sd_band, ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-i, i), color = "gray") geom_hline(yintercept = c(-sd_band, sd_band), color = "gray")
} }
} }
# Add error bars if specified # Add error bars if specified
if (!is.null(config$error_bar) && config$error_bar) { if (!is.null(config$error_bar) && config$error_bar && !is.null(config$y_var)) {
y_mean_col <- paste0("mean_", config$y_var) y_mean_col <- paste0("mean_", config$y_var)
y_sd_col <- paste0("sd_", config$y_var) y_sd_col <- paste0("sd_", config$y_var)
plot <- plot + geom_errorbar( plot <- plot + geom_errorbar(
@@ -534,7 +535,8 @@ generate_scatter_plot <- function(plot, config) {
x = annotation$x, x = annotation$x,
y = annotation$y, y = annotation$y,
label = annotation$label, label = annotation$label,
na.rm = TRUE) na.rm = TRUE
)
} }
} }
@@ -674,78 +676,121 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
df_filtered <- filter_data(df, variables, missing = TRUE) df_filtered <- filter_data(df, variables, missing = TRUE)
for (var in variables) { # Define SD bands
avg_zscore_col <- paste0("Avg_Zscore_", var) sd_bands <- c(1, 2, 3)
z_lm_col <- paste0("Z_lm_", var)
rank_col <- paste0("Rank_", var)
rank_lm_col <- paste0("Rank_lm_", var)
if (adjust) { # Define variables for Avg ZScore and Rank Avg ZScore plots
# Replace NA with 0.001 for interaction variables avg_zscore_vars <- c("r", "L", "K", "AUC")
df[[avg_zscore_col]] <- if_else(is.na(df[[avg_zscore_col]]), 0.001, df[[avg_zscore_col]])
df[[z_lm_col]] <- if_else(is.na(df[[z_lm_col]]), 0.001, df[[z_lm_col]])
}
# Compute ranks for interaction variables
df[[rank_col]] <- rank(df[[avg_zscore_col]], na.last = "keep")
df[[rank_lm_col]] <- rank(df[[z_lm_col]], na.last = "keep")
}
# Initialize list to store plot configurations # Initialize list to store plot configurations
configs <- list() configs <- list()
# Generate plot configurations for rank variables (L and K) with sd bands #### 1. SD-Based Plots for L and K ####
for (var in c("L", "K")) { for (var in c("L", "K")) {
if (is_lm) { for (sd_band in sd_bands) {
rank_var <- paste0("Rank_lm_", var)
zscore_var <- paste0("Z_lm_", var)
plot_title_prefix <- "Interaction Z score vs. Rank for"
} else {
rank_var <- paste0("Rank_", var)
zscore_var <- paste0("Avg_Zscore_", var)
plot_title_prefix <- "Average Z score vs. Rank for"
}
# Create plot configurations for each SD band # Determine columns based on whether it's lm or not
for (sd_band in c(1, 2, 3)) { if (is_lm) {
# Annotated version rank_var <- paste0(var, "_Rank_lm")
zscore_var <- paste0("Z_lm_", var)
y_label <- paste("Int Z score", var)
} else {
rank_var <- paste0(var, "_Rank")
zscore_var <- paste0("Avg_Zscore_", var)
y_label <- paste("Avg Z score", var)
}
# Annotated Plot Configuration
configs[[length(configs) + 1]] <- list( configs[[length(configs) + 1]] <- list(
df = df, df = df_filtered,
x_var = rank_var, x_var = rank_var,
y_var = zscore_var, y_var = zscore_var,
plot_type = "scatter", plot_type = "scatter",
title = paste(plot_title_prefix, var, "above", sd_band, "SD"), title = paste(y_label, "vs. Rank for", var, "above", sd_band, "SD"),
sd_band = sd_band, sd_band = sd_band,
enhancer_label = list( annotations = list(
x = nrow(df) / 2, list(
y = 10, x = median(df_filtered[[rank_var]], na.rm = TRUE),
label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ])) y = 10,
), label = paste("Deletion Enhancers =", sum(df_filtered[[zscore_var]] >= sd_band, na.rm = TRUE))
suppressor_label = list( ),
x = nrow(df) / 2, list(
y = -10, x = median(df_filtered[[rank_var]], na.rm = TRUE),
label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ])) y = -10,
label = paste("Deletion Suppressors =", sum(df_filtered[[zscore_var]] <= -sd_band, na.rm = TRUE))
)
), ),
sd_band_values = sd_band,
shape = 3, shape = 3,
size = 0.1 size = 0.1
) )
# Non-annotated version (_notext) # Non-Annotated Plot Configuration
configs[[length(configs) + 1]] <- list( configs[[length(configs) + 1]] <- list(
df = df, df = df_filtered,
x_var = rank_var, x_var = rank_var,
y_var = zscore_var, y_var = zscore_var,
plot_type = "scatter", plot_type = "scatter",
title = paste(plot_title_prefix, var, "above", sd_band, "SD No Annotations"), title = paste(y_label, "vs. Rank for", var, "above", sd_band, "SD No Annotations"),
sd_band = sd_band, sd_band = sd_band,
enhancer_label = NULL, annotations = NULL,
suppressor_label = NULL, sd_band_values = sd_band,
shape = 3, shape = 3,
size = 0.1 size = 0.1
) )
} }
} }
#### 2. Avg ZScore and Rank Avg ZScore Plots for r, L, K, and AUC ####
for (var in avg_zscore_vars) {
for (plot_type in c("Avg_Zscore_vs_lm", "Rank_Avg_Zscore_vs_lm")) {
# Define x and y variables based on plot type
if (plot_type == "Avg_Zscore_vs_lm") {
x_var <- paste0("Avg_Zscore_", var)
y_var <- paste0("Z_lm_", var)
title_suffix <- paste("Avg Zscore vs lm", var)
} else if (plot_type == "Rank_Avg_Zscore_vs_lm") {
x_var <- paste0(var, "_Rank")
y_var <- paste0(var, "_Rank_lm")
title_suffix <- paste("Rank Avg Zscore vs lm", var)
}
# Determine y-axis label
if (plot_type == "Avg_Zscore_vs_lm") {
y_label <- paste("Z lm", var)
} else {
y_label <- paste("Rank lm", var)
}
# Determine correlation text (R-squared)
lm_fit <- lm(df_filtered[[y_var]] ~ df_filtered[[x_var]], data = df_filtered)
r_squared <- summary(lm_fit)$r.squared
# Plot Configuration
configs[[length(configs) + 1]] <- list(
df = df_filtered,
x_var = x_var,
y_var = y_var,
plot_type = "scatter",
title = title_suffix,
annotations = list(
list(
x = 0,
y = 0,
label = paste("R-squared =", round(r_squared, 2))
)
),
sd_band_values = NULL, # Not applicable
shape = 3,
size = 0.1,
add_smooth = TRUE,
lm_line = list(intercept = coef(lm_fit)[1], slope = coef(lm_fit)[2]),
legend_position = "right"
)
}
}
return(configs) return(configs)
} }
@@ -1043,15 +1088,15 @@ main <- function() {
) )
message("Generating quality control plots") message("Generating quality control plots")
# generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
# generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
# generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
# generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
# generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
# generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots) generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
# generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots) generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots)
# generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
# generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
# TODO: Originally this filtered L NA's # TODO: Originally this filtered L NA's
# Let's try to avoid for now since stats have already been calculated # Let's try to avoid for now since stats have already been calculated