Begin main() refactor

This commit is contained in:
2024-09-15 17:06:36 -04:00
parent 5dfa2d1650
commit ba7f575be1

View File

@@ -9,7 +9,6 @@ suppressMessages({
}) })
options(warn = 2) options(warn = 2)
options(width = 10000)
# Set the memory limit to 30GB (30 * 1024 * 1024 * 1024 bytes) # Set the memory limit to 30GB (30 * 1024 * 1024 * 1024 bytes)
soft_limit <- 30 * 1024 * 1024 * 1024 soft_limit <- 30 * 1024 * 1024 * 1024
@@ -127,7 +126,9 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
SM = 0, SM = 0,
OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded? OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded?
conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)), conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
conc_num_factor = as.numeric(as.factor(conc_num)) - 1 conc_num_factor = as.factor(conc_num)
# conc_num_factor = factor(conc_num, levels = sort(unique(conc_num)))
# conc_num_numeric = as.numeric(conc_num_factor) - 1
) )
return(df) return(df)
@@ -334,8 +335,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE), Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE) Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
) %>% ) %>%
arrange(desc(Z_lm_L)) %>% arrange(desc(Z_lm_L), desc(NG))
arrange(desc(NG))
# Declare column order for output # Declare column order for output
calculations <- stats %>% calculations <- stats %>%
@@ -509,7 +509,7 @@ generate_scatter_plot <- function(plot, config) {
# Add x-axis customization if specified # Add x-axis customization if specified
if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) { if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) {
plot <- plot + scale_x_continuous( plot <- plot + scale_x_discrete(
name = config$x_label, name = config$x_label,
breaks = config$x_breaks, breaks = config$x_breaks,
labels = config$x_labels labels = config$x_labels
@@ -529,7 +529,11 @@ generate_scatter_plot <- function(plot, config) {
# Add annotations if specified # Add annotations if specified
if (!is.null(config$annotations)) { if (!is.null(config$annotations)) {
for (annotation in config$annotations) { for (annotation in config$annotations) {
plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label) plot <- plot + annotate("text",
x = annotation$x,
y = annotation$y,
label = annotation$label,
na.rm = TRUE)
} }
} }
@@ -606,15 +610,6 @@ generate_interaction_plot_configs <- function(df, variables) {
AUC = c(-6500, 6500) AUC = c(-6500, 6500)
) )
# Define annotation positions and labels
annotation_positions <- list(
ZShift = 45,
lm_ZScore = 25,
NG = -25,
DB = -35,
SM = -45
)
# Define functions to generate annotation labels # Define functions to generate annotation labels
annotation_labels <- list( annotation_labels <- list(
ZShift = function(df, var) { ZShift = function(df, var) {
@@ -640,22 +635,6 @@ generate_interaction_plot_configs <- function(df, variables) {
slope = df[[paste0("lm_slope_", variable)]] slope = df[[paste0("lm_slope_", variable)]]
) )
# Generate annotations
annotations <- lapply(names(annotation_positions), function(annotation_name) {
y_pos <- annotation_positions[[annotation_name]]
label_func <- annotation_labels[[annotation_name]]
if (!is.null(label_func)) {
label <- label_func(df, variable)
list(x = 1, y = y_pos, label = label)
} else {
message(paste("Warning: No annotation function found for", annotation_name))
NULL
}
})
# Remove NULL annotations
annotations <- Filter(Negate(is.null), annotations)
# Filter the data based on y-limits and missing values # Filter the data based on y-limits and missing values
y_var_sym <- sym(variable) y_var_sym <- sym(variable)
x_var_sym <- sym("conc_num_factor") x_var_sym <- sym("conc_num_factor")
@@ -677,6 +656,39 @@ generate_interaction_plot_configs <- function(df, variables) {
filtered_data_list[[variable]] <- filtered_data filtered_data_list[[variable]] <- filtered_data
out_of_range_data_list[[variable]] <- data_to_filter_out out_of_range_data_list[[variable]] <- data_to_filter_out
# Calculate x and y positions for annotations based on filtered data
x_levels <- levels(filtered_data$conc_num_factor)
x_pos <- mean(seq_along(x_levels)) # Midpoint of x-axis
y_min <- min(ylim_vals)
y_max <- max(ylim_vals)
y_span <- y_max - y_min
# Adjust y positions as fractions of y-span
annotation_positions <- list(
ZShift = y_max - 0.1 * y_span,
lm_ZScore = y_max - 0.2 * y_span,
NG = y_min + 0.2 * y_span,
DB = y_min + 0.1 * y_span,
SM = y_min + 0.05 * y_span
)
# Generate annotations
annotations <- lapply(names(annotation_positions), function(annotation_name) {
y_pos <- annotation_positions[[annotation_name]]
label_func <- annotation_labels[[annotation_name]]
if (!is.null(label_func)) {
label <- label_func(df, variable)
list(x = x_pos, y = y_pos, label = label)
} else {
message(paste("Warning: No annotation function found for", annotation_name))
NULL
}
})
# Remove NULL annotations
annotations <- Filter(Negate(is.null), annotations)
# Create scatter plot config # Create scatter plot config
configs[[length(configs) + 1]] <- list( configs[[length(configs) + 1]] <- list(
df = filtered_data, df = filtered_data,
@@ -688,11 +700,11 @@ generate_interaction_plot_configs <- function(df, variables) {
annotations = annotations, annotations = annotations,
lm_line = lm_line, lm_line = lm_line,
error_bar = TRUE, error_bar = TRUE,
x_breaks = unique(df$conc_num_factor), x_breaks = levels(filtered_data$conc_num_factor),
x_labels = unique(as.character(df$conc_num)), x_labels = levels(filtered_data$conc_num_factor),
x_label = unique(df$Drug[1]), x_label = unique(df$Drug[1]),
position = "jitter", position = "jitter",
coord_cartesian = c(min(ylim_vals), max(ylim_vals)) coord_cartesian = ylim_vals # Use the actual y-limits
) )
# Create box plot config # Create box plot config
@@ -705,10 +717,10 @@ generate_interaction_plot_configs <- function(df, variables) {
ylim_vals = ylim_vals, ylim_vals = ylim_vals,
annotations = annotations, annotations = annotations,
error_bar = FALSE, error_bar = FALSE,
x_breaks = unique(df$conc_num_factor), x_breaks = unique(filtered_data$conc_num_factor),
x_labels = unique(as.character(df$conc_num)), x_labels = unique(as.character(filtered_data$conc_num)),
x_label = unique(df$Drug[1]), x_label = unique(df$Drug[1]),
coord_cartesian = c(min(ylim_vals), max(ylim_vals)) coord_cartesian = ylim_vals
) )
} }
@@ -718,6 +730,7 @@ generate_interaction_plot_configs <- function(df, variables) {
return(list( return(list(
configs = configs, configs = configs,
filtered_data = filtered_data_df,
out_of_range_data = out_of_range_data_df out_of_range_data = out_of_range_data_df
)) ))
} }
@@ -823,7 +836,7 @@ main <- function() {
df_no_zeros <- df_na %>% filter(L > 0) df_no_zeros <- df_na %>% filter(L > 0)
# Save some constants # Save some constants
max_conc <- max(df$conc_num_factor) max_conc <- max(df$conc_num)
l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2 l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2 k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
@@ -901,7 +914,7 @@ main <- function() {
plot_type = "scatter", plot_type = "scatter",
delta_bg_point = TRUE, delta_bg_point = TRUE,
title = "Raw L vs K before quality control", title = "Raw L vs K before quality control",
color_var = "conc_num", color_var = "conc_num_factor",
error_bar = FALSE, error_bar = FALSE,
legend_position = "right" legend_position = "right"
) )
@@ -914,7 +927,7 @@ main <- function() {
y_var = NULL, y_var = NULL,
plot_type = "density", plot_type = "density",
title = "Plate analysis by Drug Conc for Delta Background before quality control", title = "Plate analysis by Drug Conc for Delta Background before quality control",
color_var = "conc_num", color_var = "conc_num_factor",
x_label = "Delta Background", x_label = "Delta Background",
y_label = "Density", y_label = "Density",
error_bar = FALSE, error_bar = FALSE,
@@ -925,7 +938,7 @@ main <- function() {
y_var = NULL, y_var = NULL,
plot_type = "bar", plot_type = "bar",
title = "Plate analysis by Drug Conc for Delta Background before quality control", title = "Plate analysis by Drug Conc for Delta Background before quality control",
color_var = "conc_num", color_var = "conc_num_factor",
x_label = "Delta Background", x_label = "Delta Background",
y_label = "Count", y_label = "Count",
error_bar = FALSE, error_bar = FALSE,
@@ -941,7 +954,7 @@ main <- function() {
delta_bg_point = TRUE, delta_bg_point = TRUE,
title = paste("Raw L vs K for strains above Delta Background threshold of", title = paste("Raw L vs K for strains above Delta Background threshold of",
df_above_tolerance$delta_bg_tolerance[[1]], "or above"), df_above_tolerance$delta_bg_tolerance[[1]], "or above"),
color_var = "conc_num", color_var = "conc_num_factor",
position = "jitter", position = "jitter",
annotations = list( annotations = list(
x = l_half_median, x = l_half_median,
@@ -969,7 +982,7 @@ main <- function() {
plot_type = "scatter", plot_type = "scatter",
title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"), title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
error_bar = TRUE, error_bar = TRUE,
color_var = "conc_num", color_var = "conc_num_factor",
position = "jitter") position = "jitter")
plate_analysis_plots <- append(plate_analysis_plots, list(config)) plate_analysis_plots <- append(plate_analysis_plots, list(config))
@@ -992,7 +1005,7 @@ main <- function() {
plot_type = "box", plot_type = "box",
title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"), title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
error_bar = FALSE, error_bar = FALSE,
color_var = "conc_num") color_var = "conc_num_factor")
plate_analysis_boxplots <- append(plate_analysis_boxplots, list(config)) plate_analysis_boxplots <- append(plate_analysis_boxplots, list(config))
} }
@@ -1007,7 +1020,7 @@ main <- function() {
plot_type = "scatter", plot_type = "scatter",
title = paste("Plate analysis by Drug Conc for", var, "after quality control"), title = paste("Plate analysis by Drug Conc for", var, "after quality control"),
error_bar = TRUE, error_bar = TRUE,
color_var = "conc_num", color_var = "conc_num_factor",
position = "jitter") position = "jitter")
plate_analysis_no_zeros_plots <- append(plate_analysis_no_zeros_plots, list(config)) plate_analysis_no_zeros_plots <- append(plate_analysis_no_zeros_plots, list(config))
@@ -1022,7 +1035,7 @@ main <- function() {
plot_type = "box", plot_type = "box",
title = paste("Plate analysis by Drug Conc for", var, "after quality control"), title = paste("Plate analysis by Drug Conc for", var, "after quality control"),
error_bar = FALSE, error_bar = FALSE,
color_var = "conc_num" color_var = "conc_num_factor"
) )
plate_analysis_no_zeros_boxplots <- append(plate_analysis_no_zeros_boxplots, list(config)) plate_analysis_no_zeros_boxplots <- append(plate_analysis_no_zeros_boxplots, list(config))
} }
@@ -1035,7 +1048,7 @@ main <- function() {
plot_type = "scatter", plot_type = "scatter",
delta_bg_point = TRUE, delta_bg_point = TRUE,
title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc", title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
color_var = "conc_num", color_var = "conc_num_factor",
position = "jitter", position = "jitter",
legend_position = "right" legend_position = "right"
) )
@@ -1049,26 +1062,22 @@ main <- function() {
plot_type = "scatter", plot_type = "scatter",
gene_point = TRUE, gene_point = TRUE,
title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc", title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
color_var = "conc_num", color_var = "conc_num_factor",
position = "jitter", position = "jitter",
legend_position = "right" legend_position = "right"
) )
) )
# message("Generating quality control plots") message("Generating quality control plots")
# generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
# generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
# generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
# generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
# generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots)
# generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots) generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
# generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots) generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots)
# generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
# generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
# Clean up
rm(df, df_above_tolerance, df_no_zeros, df_no_zeros_stats, df_no_zeros_filtered_stats, ss)
gc()
# TODO: Originally this filtered L NA's # TODO: Originally this filtered L NA's
# Let's try to avoid for now since stats have already been calculated # Let's try to avoid for now since stats have already been calculated
@@ -1160,7 +1169,8 @@ main <- function() {
results <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars) results <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars)
if (nrow(results$out_of_range_data) > 0) { if (nrow(results$out_of_range_data) > 0) {
message("Out-of-range data:") message("Out-of-range data:")
print(results$out_of_range_data) print(results$out_of_range_data %>% select("OrfRep", "Gene", "num",
"conc_num", "conc_num_factor", config$x_var, config$y_var))
} }
reference_plot_configs <- results$configs reference_plot_configs <- results$configs
generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))