diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index e4efb574..0a91dab2 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -205,8 +205,6 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c return(summary_stats) } - - calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { # Calculate total concentration variables @@ -329,6 +327,37 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores)) } +interaction_plot_configs <- function(df, variable) { + ylim_vals <- switch(variable, + "L" = c(-65, 65), + "K" = c(-65, 65), + "r" = c(-0.65, 0.65), + "AUC" = c(-6500, 6500), + NULL + ) + + wt_sd <- paste0("WT_sd_", variable) + delta_var <- paste0("Delta_", variable) + z_shift <- paste0("Z_Shift_", variable) + z_lm <- paste0("Z_lm_", variable) + + list( + x_var = "conc_num_factor", + y_var = delta_var, + plot_type = "scatter", + title = paste("Scatter plot for", variable), + ylim_vals = ylim_vals, + annotations = list( + list(x = 1, y = max(ylim_vals) * 0.7, label = paste("ZShift =", round(df[[z_shift]], 2))), + list(x = 1, y = max(ylim_vals) * 0.5, label = paste("Z lm Score =", round(df[[z_lm]], 2))), + list(x = 1, y = min(ylim_vals) * 0.7, label = paste("NG =", df$NG)), + list(x = 1, y = min(ylim_vals) * 0.85, label = paste("DB =", df$DB)), + list(x = 1, y = min(ylim_vals) * 1.1, label = paste("SM =", df$SM)) + ) + ) +} + + generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_num", title, x_label = NULL, y_label = NULL, ylim_vals = NULL) { @@ -360,7 +389,7 @@ generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_ return(plot) } -generate_and_save_plots <- function(df, output_dir, file_prefix, variables, plot_type = "scatter", include_qc = FALSE, ylim_vals = NULL) { +generate_and_save_plots <- function(df, output_dir, file_prefix, plot_configs) { plots <- list() if (nrow(df) == 0) { @@ -370,29 +399,35 @@ generate_and_save_plots <- function(df, output_dir, file_prefix, variables, plot message("Generating plots for \"", deparse(substitute(df)), "\" dataframe") - # Create plots for the given variables - for (var in variables) { + for (config in plot_configs) { plot <- generate_plot( df = df, - x_var = "scan", - y_var = var, - plot_type = plot_type, - title = paste(file_prefix, "Plot for", var), - ylim_vals = ylim_vals + x_var = config$x_var, + y_var = config$y_var, + plot_type = config$plot_type, + title = config$title, + ylim_vals = config$ylim_vals ) - plots[[paste0(var, "_", plot_type)]] <- plot + + # If custom annotations for interaction plots are required + if (!is.null(config$annotations)) { + for (annotation in config$annotations) { + plot <- plot + + annotate( + "text", x = annotation$x, y = annotation$y, label = annotation$label + ) + } + } + + # Store the plot with the variable name and plot type + plots[[paste0(config$y_var, "_", config$plot_type)]] <- plot } - - # Include additional QC plots if requested - if (include_qc) { - plots[["Raw_L_vs_K"]] <- generate_plot(df, "L", "K", "scatter", title = "Raw L vs K before QC") - plots[["Delta_bg_Density"]] <- generate_plot(df, "delta_bg", NULL, "density", title = "Density plot for Delta Background") - plots[["Delta_bg_Bar"]] <- generate_plot(df, "delta_bg", NULL, "bar", title = "Bar plot for Delta Background") - } - + save_plots(file_prefix, plots, output_dir) } + + # Ensure all plots are saved and printed to PDF save_plots <- function(file_name, plot_list, output_dir) { # Save to PDF @@ -613,9 +648,22 @@ main <- function() { # QC steps and filtering df_above_tolerance <- df %>% filter(DB == 1) - df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .x))) + df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .))) df_no_zeros <- df_na %>% filter(L > 0) - + + # Calculate the half-medians for `L` and `K` for rows above tolerance + L_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2 + K_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2 + + # Get the number of rows that are above tolerance + rows_to_remove <- nrow(df_above_tolerance) + + # Logging or handling the calculated values, e.g.: + message("Half-median for L (above tolerance): ", L_half_median) + message("Half-median for K (above tolerance): ", K_half_median) + message("Number of rows above tolerance: ", rows_to_remove) + + # Additional filtering for non-finite values in df_na df_na_filtered <- df_na %>% filter(if_any(c(L), ~ !is.finite(.))) %>% { @@ -626,15 +674,36 @@ main <- function() { df_na %>% filter(if_all(c(L), is.finite)) } - # Generate and save QC plots using the new generalized function - # message("Generating QC plots") - # variables <- c("L", "K", "r", "AUC", "delta_bg") - # generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE) - # generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE) - # generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", variables) - # generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables) + # Define the plot configurations for each QC step + qc_plot_configs <- list( + list(x_var = "L", y_var = "K", plot_type = "scatter", title = "Raw L vs K before QC", ylim_vals = NULL), + list(x_var = "delta_bg", y_var = NULL, plot_type = "density", title = "Density plot for Delta Background", ylim_vals = NULL), + list(x_var = "delta_bg", y_var = NULL, plot_type = "bar", title = "Bar plot for Delta Background", ylim_vals = NULL) + ) + + above_tolerance_plot_configs <- list( + list(x_var = "L", y_var = "K", plot_type = "scatter", title = "L vs K for Rows Above Delta Background Threshold", ylim_vals = NULL) + ) + + after_qc_plot_configs <- list( + list(x_var = "L", y_var = "K", plot_type = "scatter", title = "L vs K After QC Filtering", ylim_vals = NULL) + ) + + no_zeros_plot_configs <- list( + list(x_var = "L", y_var = "K", plot_type = "scatter", title = "L vs K for Non-Zero L Values", ylim_vals = NULL) + ) + + # Generate and save plots for each QC step + message("Generating QC plots") + generate_and_save_plots(df, out_dir_qc, "Before_QC", qc_plot_configs) + generate_and_save_plots(df_above_tolerance, out_dir_qc, "Above_Tolerance", above_tolerance_plot_configs) + generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", after_qc_plot_configs) + generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", no_zeros_plot_configs) + + + # Clean up + rm(df, df_above_tolerance, df_no_zeros) - rm(df, df_above_tolerance, df_no_zeros) # Calculate summary statistics message("Calculating summary statistics for all strains") @@ -790,11 +859,20 @@ main <- function() { write.csv(suppressors_lm_K, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE) - # Generate plots for interaction scores - # generate_rf_plots(zscores_calculations_reference, zscores_interactions_reference, out_dir) - # generate_rf_plots(zscores_calculations, zscores_interactions, out_dir) - generate_and_save_plots(zscores_calculations_reference, out_dir, "Reference_Calculations", variables) - generate_and_save_plots(zscores_calculations, out_dir, "Deletion_Calculations", variables) + # Interaction plots for reference strain + variables <- c("L", "K", "r", "AUC") + reference_plot_configs <- lapply(variables, function(var) { + interaction_plot_configs(zscores_calculations_reference, var) + }) + + generate_and_save_plots(zscores_calculations_reference, out_dir, "RF_InteractionPlots", reference_plot_configs) + + # Interaction plots for deletion strains + deletion_plot_configs <- lapply(variables, function(var) { + interaction_plot_configs(zscores_calculations, var) + }) + + generate_and_save_plots(zscores_calculations, out_dir, "InteractionPlots", deletion_plot_configs) # Apply filtering to remove NA values before further analysis zscores_interactions_filtered <- zscores_interactions %>%