diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 1acde20c..913aea91 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -9,7 +9,6 @@ suppressMessages({ }) options(warn = 2) -options(width = 10000) # Set the memory limit to 30GB (30 * 1024 * 1024 * 1024 bytes) soft_limit <- 30 * 1024 * 1024 * 1024 @@ -127,7 +126,9 @@ load_and_process_data <- function(easy_results_file, sd = 3) { SM = 0, OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded? conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)), - conc_num_factor = as.numeric(as.factor(conc_num)) - 1 + conc_num_factor = as.factor(conc_num) + # conc_num_factor = factor(conc_num, levels = sort(unique(conc_num))) + # conc_num_numeric = as.numeric(conc_num_factor) - 1 ) return(df) @@ -334,8 +335,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE), Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE) ) %>% - arrange(desc(Z_lm_L)) %>% - arrange(desc(NG)) + arrange(desc(Z_lm_L), desc(NG)) # Declare column order for output calculations <- stats %>% @@ -509,7 +509,7 @@ generate_scatter_plot <- function(plot, config) { # Add x-axis customization if specified if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) { - plot <- plot + scale_x_continuous( + plot <- plot + scale_x_discrete( name = config$x_label, breaks = config$x_breaks, labels = config$x_labels @@ -529,7 +529,11 @@ generate_scatter_plot <- function(plot, config) { # Add annotations if specified if (!is.null(config$annotations)) { for (annotation in config$annotations) { - plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label) + plot <- plot + annotate("text", + x = annotation$x, + y = annotation$y, + label = annotation$label, + na.rm = TRUE) } } @@ -606,15 +610,6 @@ generate_interaction_plot_configs <- function(df, variables) { AUC = c(-6500, 6500) ) - # Define annotation positions and labels - annotation_positions <- list( - ZShift = 45, - lm_ZScore = 25, - NG = -25, - DB = -35, - SM = -45 - ) - # Define functions to generate annotation labels annotation_labels <- list( ZShift = function(df, var) { @@ -640,22 +635,6 @@ generate_interaction_plot_configs <- function(df, variables) { slope = df[[paste0("lm_slope_", variable)]] ) - # Generate annotations - annotations <- lapply(names(annotation_positions), function(annotation_name) { - y_pos <- annotation_positions[[annotation_name]] - label_func <- annotation_labels[[annotation_name]] - if (!is.null(label_func)) { - label <- label_func(df, variable) - list(x = 1, y = y_pos, label = label) - } else { - message(paste("Warning: No annotation function found for", annotation_name)) - NULL - } - }) - - # Remove NULL annotations - annotations <- Filter(Negate(is.null), annotations) - # Filter the data based on y-limits and missing values y_var_sym <- sym(variable) x_var_sym <- sym("conc_num_factor") @@ -677,6 +656,39 @@ generate_interaction_plot_configs <- function(df, variables) { filtered_data_list[[variable]] <- filtered_data out_of_range_data_list[[variable]] <- data_to_filter_out + # Calculate x and y positions for annotations based on filtered data + x_levels <- levels(filtered_data$conc_num_factor) + x_pos <- mean(seq_along(x_levels)) # Midpoint of x-axis + + y_min <- min(ylim_vals) + y_max <- max(ylim_vals) + y_span <- y_max - y_min + + # Adjust y positions as fractions of y-span + annotation_positions <- list( + ZShift = y_max - 0.1 * y_span, + lm_ZScore = y_max - 0.2 * y_span, + NG = y_min + 0.2 * y_span, + DB = y_min + 0.1 * y_span, + SM = y_min + 0.05 * y_span + ) + + # Generate annotations + annotations <- lapply(names(annotation_positions), function(annotation_name) { + y_pos <- annotation_positions[[annotation_name]] + label_func <- annotation_labels[[annotation_name]] + if (!is.null(label_func)) { + label <- label_func(df, variable) + list(x = x_pos, y = y_pos, label = label) + } else { + message(paste("Warning: No annotation function found for", annotation_name)) + NULL + } + }) + + # Remove NULL annotations + annotations <- Filter(Negate(is.null), annotations) + # Create scatter plot config configs[[length(configs) + 1]] <- list( df = filtered_data, @@ -688,11 +700,11 @@ generate_interaction_plot_configs <- function(df, variables) { annotations = annotations, lm_line = lm_line, error_bar = TRUE, - x_breaks = unique(df$conc_num_factor), - x_labels = unique(as.character(df$conc_num)), + x_breaks = levels(filtered_data$conc_num_factor), + x_labels = levels(filtered_data$conc_num_factor), x_label = unique(df$Drug[1]), position = "jitter", - coord_cartesian = c(min(ylim_vals), max(ylim_vals)) + coord_cartesian = ylim_vals # Use the actual y-limits ) # Create box plot config @@ -705,10 +717,10 @@ generate_interaction_plot_configs <- function(df, variables) { ylim_vals = ylim_vals, annotations = annotations, error_bar = FALSE, - x_breaks = unique(df$conc_num_factor), - x_labels = unique(as.character(df$conc_num)), + x_breaks = unique(filtered_data$conc_num_factor), + x_labels = unique(as.character(filtered_data$conc_num)), x_label = unique(df$Drug[1]), - coord_cartesian = c(min(ylim_vals), max(ylim_vals)) + coord_cartesian = ylim_vals ) } @@ -718,6 +730,7 @@ generate_interaction_plot_configs <- function(df, variables) { return(list( configs = configs, + filtered_data = filtered_data_df, out_of_range_data = out_of_range_data_df )) } @@ -823,7 +836,7 @@ main <- function() { df_no_zeros <- df_na %>% filter(L > 0) # Save some constants - max_conc <- max(df$conc_num_factor) + max_conc <- max(df$conc_num) l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2 k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2 @@ -901,7 +914,7 @@ main <- function() { plot_type = "scatter", delta_bg_point = TRUE, title = "Raw L vs K before quality control", - color_var = "conc_num", + color_var = "conc_num_factor", error_bar = FALSE, legend_position = "right" ) @@ -914,7 +927,7 @@ main <- function() { y_var = NULL, plot_type = "density", title = "Plate analysis by Drug Conc for Delta Background before quality control", - color_var = "conc_num", + color_var = "conc_num_factor", x_label = "Delta Background", y_label = "Density", error_bar = FALSE, @@ -925,7 +938,7 @@ main <- function() { y_var = NULL, plot_type = "bar", title = "Plate analysis by Drug Conc for Delta Background before quality control", - color_var = "conc_num", + color_var = "conc_num_factor", x_label = "Delta Background", y_label = "Count", error_bar = FALSE, @@ -941,7 +954,7 @@ main <- function() { delta_bg_point = TRUE, title = paste("Raw L vs K for strains above Delta Background threshold of", df_above_tolerance$delta_bg_tolerance[[1]], "or above"), - color_var = "conc_num", + color_var = "conc_num_factor", position = "jitter", annotations = list( x = l_half_median, @@ -969,7 +982,7 @@ main <- function() { plot_type = "scatter", title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"), error_bar = TRUE, - color_var = "conc_num", + color_var = "conc_num_factor", position = "jitter") plate_analysis_plots <- append(plate_analysis_plots, list(config)) @@ -992,7 +1005,7 @@ main <- function() { plot_type = "box", title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"), error_bar = FALSE, - color_var = "conc_num") + color_var = "conc_num_factor") plate_analysis_boxplots <- append(plate_analysis_boxplots, list(config)) } @@ -1007,7 +1020,7 @@ main <- function() { plot_type = "scatter", title = paste("Plate analysis by Drug Conc for", var, "after quality control"), error_bar = TRUE, - color_var = "conc_num", + color_var = "conc_num_factor", position = "jitter") plate_analysis_no_zeros_plots <- append(plate_analysis_no_zeros_plots, list(config)) @@ -1022,7 +1035,7 @@ main <- function() { plot_type = "box", title = paste("Plate analysis by Drug Conc for", var, "after quality control"), error_bar = FALSE, - color_var = "conc_num" + color_var = "conc_num_factor" ) plate_analysis_no_zeros_boxplots <- append(plate_analysis_no_zeros_boxplots, list(config)) } @@ -1035,7 +1048,7 @@ main <- function() { plot_type = "scatter", delta_bg_point = TRUE, title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc", - color_var = "conc_num", + color_var = "conc_num_factor", position = "jitter", legend_position = "right" ) @@ -1049,26 +1062,22 @@ main <- function() { plot_type = "scatter", gene_point = TRUE, title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc", - color_var = "conc_num", + color_var = "conc_num_factor", position = "jitter", legend_position = "right" ) ) - # message("Generating quality control plots") - # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) - # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) - # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) - # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) - # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) - # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots) - # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots) - # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) - # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) - - # Clean up - rm(df, df_above_tolerance, df_no_zeros, df_no_zeros_stats, df_no_zeros_filtered_stats, ss) - gc() + message("Generating quality control plots") + generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) + generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) + generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) + generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) + generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) + generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots) + generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots) + generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) + generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) # TODO: Originally this filtered L NA's # Let's try to avoid for now since stats have already been calculated @@ -1160,7 +1169,8 @@ main <- function() { results <- generate_interaction_plot_configs(zscores_interactions_reference_joined, interaction_vars) if (nrow(results$out_of_range_data) > 0) { message("Out-of-range data:") - print(results$out_of_range_data) + print(results$out_of_range_data %>% select("OrfRep", "Gene", "num", + "conc_num", "conc_num_factor", config$x_var, config$y_var)) } reference_plot_configs <- results$configs generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))