diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 18436a59..3a2c8d9d 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -281,9 +281,12 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = stats) interactions <- stats %>% - transmute( + summarise( OrfRep = first(OrfRep), Gene = first(Gene), + num = first(num), + conc_num = first(conc_num), + conc_num_factor = first(conc_num_factor), Raw_Shift_L = first(Raw_Shift_L), Raw_Shift_K = first(Raw_Shift_K), Raw_Shift_r = first(Raw_Shift_r), @@ -352,9 +355,26 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c calculations_joined <- df %>% select(-any_of(setdiff(names(calculations), c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")))) calculations_joined <- left_join(calculations_joined, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")) + + + + # # TODO for debug + # df_duplicates <- df %>% + # group_by(OrfRep, Gene, num) %>% + # filter(n() > 1) + + # interactions_duplicates <- interactions %>% + # group_by(OrfRep, Gene, num) %>% + # filter(n() > 1) + + # print(df_duplicates) + # print(interactions_duplicates) + - interactions_joined <- df %>% select(-any_of(setdiff(names(interactions), c("OrfRep", "Gene", "num")))) - interactions_joined <- left_join(interactions_joined, interactions, by = c("OrfRep", "Gene", "num")) + + + interactions_joined <- df %>% select(-any_of(setdiff(names(interactions), c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")))) + interactions_joined <- left_join(interactions_joined, interactions, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")) return(list(calculations = calculations, interactions = interactions, interactions_joined = interactions_joined, calculations_joined = calculations_joined)) @@ -420,7 +440,20 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la } generate_scatter_plot <- function(plot, config, interactive = FALSE) { - + # Check for missing or out-of-range data + missing_data <- config$df %>% + filter( + is.na(!!sym(config$x_var)) | is.na(!!sym(config$y_var)) | + !!sym(config$y_var) < min(config$ylim_vals, na.rm = TRUE) | + !!sym(config$y_var) > max(config$ylim_vals, na.rm = TRUE) + ) + + # Print the rows with missing or out-of-range data if any + if (nrow(missing_data) > 0) { + message("Missing or out-of-range data for ", config$title, ":") + print(missing_data %>% select(any_of(c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", config$x_var, config$y_var))), n = 100) + } + # Add the interactive `text` aesthetic if `interactive` is TRUE if (interactive) { plot <- if (!is.null(config$delta_bg_point) && config$delta_bg_point) { @@ -475,34 +508,16 @@ generate_scatter_plot <- function(plot, config, interactive = FALSE) { labels = config$x_labels) } - # Add y-axis limits if specified + # Use coord_cartesian for zooming in without removing data outside the range + if (!is.null(config$coord_cartesian)) { + plot <- plot + coord_cartesian(ylim = config$coord_cartesian) + } + + # Use scale_y_continuous for setting the y-axis limits if (!is.null(config$ylim_vals)) { plot <- plot + scale_y_continuous(limits = config$ylim_vals) } - # Add Cartesian coordinates customization if specified - if (!is.null(config$coord_cartesian)) { - plot <- plot + coord_cartesian(ylim = config$coord_cartesian) - } - - return(plot) -} - -generate_box_plot <- function(plot, config) { - plot <- plot + geom_boxplot() - - if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) { - plot <- plot + scale_x_discrete( - name = config$x_label, - breaks = config$x_breaks, - labels = config$x_labels - ) - } - - if (!is.null(config$coord_cartesian)) { - plot <- plot + coord_cartesian(ylim = config$coord_cartesian) - } - return(plot) } @@ -562,14 +577,13 @@ generate_interaction_plot_configs <- function(df, variables) { # Dynamically generate the names of the columns var_info <- list( ylim = limits_map[[variable]], - lm_model = df[[paste0("lm_", variable)]][[1]], - sd_col = paste0("WT_sd_", variable), + sd_col = paste0("WT_sd_", variable) ) # Extract the precomputed linear model coefficients lm_line <- list( - intercept = coef(var_info$lm_model)[1], - slope = coef(var_info$lm_model)[2] + intercept = df[[paste0("lm_intercept_", variable)]], + slope = df[[paste0("lm_slope_", variable)]] ) annotations <- lapply(names(annotation_positions[[variable]]), function(annotation_name) { @@ -669,7 +683,6 @@ generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FA return(configs) } - generate_correlation_plot_configs <- function(df, variables) { configs <- list() @@ -960,16 +973,16 @@ main <- function() { ) ) - message("Generating quality control plots") - generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) - generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) - generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) - generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) - generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) - generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots) - generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots) - generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) - generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) + # message("Generating quality control plots") + # generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) + # generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) + # generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) + # generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots) + # generate_and_save_plots(out_dir_qc, "plate_analysis_boxplots", plate_analysis_boxplots) + # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots) + # generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros_boxplots", plate_analysis_no_zeros_boxplots) + # generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots) + # generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots) # Clean up rm(df, df_above_tolerance, df_no_zeros, df_no_zeros_stats, df_no_zeros_filtered_stats, ss)