diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index bac3fe3d..5c460139 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -160,10 +160,6 @@ load_and_filter_data <- function(easy_results_file, sd = 3) { conc_num_factor_factor = as.factor(conc_num) ) - # Set the max concentration across the whole dataframe - df <- df %>% - mutate(max_conc = max(df$conc_num_factor, na.rm = TRUE)) - return(df) } @@ -215,8 +211,10 @@ calculate_summary_stats <- function(df, variables, group_vars) { return(list(summary_stats = summary_stats, df_with_stats = df_joined)) } -calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overlap_threshold = 2) { +calculate_interaction_scores <- function(df, df_bg, group_vars) { + max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE) + # Include background statistics per concentration bg_stats <- df_bg %>% group_by(conc_num, conc_num_factor) %>% @@ -231,14 +229,14 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla WT_sd_AUC = first(sd_AUC), .groups = "drop" ) - + # Calculate total number of concentrations total_conc_num <- length(unique(df$conc_num)) - + # Join background statistics to df calculations <- df %>% left_join(bg_stats, by = c("conc_num", "conc_num_factor")) - + # Perform calculations calculations <- calculations %>% group_by(across(all_of(group_vars))) %>% @@ -248,38 +246,38 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE), num_non_removed_concs = n_distinct(conc_num[DB != 1]), - + # Raw shifts Raw_Shift_L = mean_L - WT_L, Raw_Shift_K = mean_K - WT_K, Raw_Shift_r = mean_r - WT_r, Raw_Shift_AUC = mean_AUC - WT_AUC, - + # Z shifts Z_Shift_L = Raw_Shift_L / WT_sd_L, Z_Shift_K = Raw_Shift_K / WT_sd_K, Z_Shift_r = Raw_Shift_r / WT_sd_r, Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC, - + # Expected values Exp_L = WT_L + Raw_Shift_L, Exp_K = WT_K + Raw_Shift_K, Exp_r = WT_r + Raw_Shift_r, Exp_AUC = WT_AUC + Raw_Shift_AUC, - + # Deltas Delta_L = mean_L - Exp_L, Delta_K = mean_K - Exp_K, Delta_r = mean_r - Exp_r, Delta_AUC = mean_AUC - Exp_AUC, - + # Adjust Deltas for NG and SM Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L), Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r), Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC), Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L), - + # Z-scores Zscore_L = Delta_L / WT_sd_L, Zscore_K = Delta_K / WT_sd_K, @@ -287,7 +285,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla Zscore_AUC = Delta_AUC / WT_sd_AUC ) %>% ungroup() - + # Fit linear models within each group calculations <- calculations %>% group_by(across(all_of(group_vars))) %>% @@ -321,7 +319,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla return(data) }) %>% ungroup() - + # Compute lm means and sds across all data without grouping lm_means_sds <- calculations %>% summarise( @@ -334,7 +332,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE), lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE) ) - + # Apply global lm means and sds to calculate Z_lm_* calculations <- calculations %>% mutate( @@ -343,7 +341,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla Z_lm_r = (lm_Score_r - lm_means_sds$lm_mean_r) / lm_means_sds$lm_sd_r, Z_lm_AUC = (lm_Score_AUC - lm_means_sds$lm_mean_AUC) / lm_means_sds$lm_sd_AUC ) - + # Build interactions data frame interactions <- calculations %>% group_by(across(all_of(group_vars))) %>% @@ -352,75 +350,46 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, max_conc, overla Avg_Zscore_K = mean(Zscore_K, na.rm = TRUE), Avg_Zscore_r = mean(Zscore_r, na.rm = TRUE), Avg_Zscore_AUC = mean(Zscore_AUC, na.rm = TRUE), - + # Interaction Z-scores Z_lm_L = first(Z_lm_L), Z_lm_K = first(Z_lm_K), Z_lm_r = first(Z_lm_r), Z_lm_AUC = first(Z_lm_AUC), - + # Raw Shifts Raw_Shift_L = first(Raw_Shift_L), Raw_Shift_K = first(Raw_Shift_K), Raw_Shift_r = first(Raw_Shift_r), Raw_Shift_AUC = first(Raw_Shift_AUC), - + # Z Shifts Z_Shift_L = first(Z_Shift_L), Z_Shift_K = first(Z_Shift_K), Z_Shift_r = first(Z_Shift_r), Z_Shift_AUC = first(Z_Shift_AUC), - + # NG, DB, SM values NG = first(NG), DB = first(DB), SM = first(SM), - + # R Squared values R_Squared_L = first(R_Squared_L), R_Squared_K = first(R_Squared_K), R_Squared_r = first(R_Squared_r), R_Squared_AUC = first(R_Squared_AUC), - + + # Include Drug + Drug = first(Drug), + .groups = "drop" ) - - # Create the final calculations and interactions dataframes with required columns - calculations_df <- calculations %>% - select( - all_of(group_vars), - conc_num, conc_num_factor, conc_num_factor_factor, - N, NG, DB, SM, - mean_L, mean_K, mean_r, mean_AUC, - Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC, - Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC, - WT_L, WT_K, WT_r, WT_AUC, - WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC, - Exp_L, Exp_K, Exp_r, Exp_AUC, - Delta_L, Delta_K, Delta_r, Delta_AUC, - Zscore_L, Zscore_K, Zscore_r, Zscore_AUC - ) - - interactions_df <- interactions %>% - select( - all_of(group_vars), - NG, DB, SM, - Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC, - Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC, - Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC, - Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC, - R_Squared_L, R_Squared_K, R_Squared_r, R_Squared_AUC - ) - - # Create full_data by joining calculations_df and interactions_df - full_data <- calculations_df %>% - left_join(interactions_df, by = group_vars, suffix = c("", "_interaction")) - - # Return the dataframes + + # Return the dataframes without creating full_data return(list( - calculations = calculations_df, - interactions = interactions_df, - full_data = full_data + calculations = calculations, + interactions = interactions )) } @@ -491,40 +460,29 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) { "red" } - y_mean_prefix <- if (!is.null(config$error_bar_params$y_mean_prefix)) { - config$error_bar_params$y_mean_prefix - } else { - "mean_" - } - y_mean_col <- paste0(y_mean_prefix, config$y_var) - - # Dynamically set y_sd_col based on the provided prefix in error_bar_params - y_sd_prefix <- if (!is.null(config$error_bar_params$y_sd_prefix)) { - config$error_bar_params$y_sd_prefix - } else { - "sd_" - } - y_sd_col <- paste0(y_sd_prefix, config$y_var) - - if (!is.null(config$error_bar_params$center_point)) { - plot <- plot + geom_point(aes( - x = .data[[config$x_var]], - y = first(.data[[y_mean_col]])), - color = error_bar_color, - shape = 16) - } - - # Use error_bar_params if provided, otherwise calculate from mean and sd if (!is.null(config$error_bar_params$ymin) && !is.null(config$error_bar_params$ymax)) { - plot <- plot + geom_errorbar(aes( - ymin = config$error_bar_params$ymin, - ymax = config$error_bar_params$ymax), - color = error_bar_color) + # Check if ymin and ymax are constants or column names + if (is.numeric(config$error_bar_params$ymin) && is.numeric(config$error_bar_params$ymax)) { + plot <- plot + geom_errorbar(aes(x = .data[[config$x_var]]), + ymin = config$error_bar_params$ymin, + ymax = config$error_bar_params$ymax, + color = error_bar_color) + } else { + plot <- plot + geom_errorbar(aes( + x = .data[[config$x_var]], + ymin = .data[[config$error_bar_params$ymin]], + ymax = .data[[config$error_bar_params$ymax]] + ), color = error_bar_color) + } } else { + # Original code for calculating from mean and sd + y_mean_col <- paste0("mean_", config$y_var) + y_sd_col <- paste0("sd_", config$y_var) plot <- plot + geom_errorbar(aes( - ymin = first(.data[[y_mean_col]]) - first(.data[[y_sd_col]]), - ymax = first(.data[[y_mean_col]]) + first(.data[[y_sd_col]])), - color = error_bar_color) + x = .data[[config$x_var]], + ymin = .data[[y_mean_col]] - .data[[y_sd_col]], + ymax = .data[[y_mean_col]] + .data[[y_sd_col]] + ), color = error_bar_color) } } @@ -756,8 +714,8 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df return(list(plots = plot_configs)) } -generate_interaction_plot_configs <- function(df, type) { - +generate_interaction_plot_configs <- function(df, df_calculations, df_interactions, type) { + # Define the y-limits for the plots limits_map <- list( L = c(0, 130), @@ -769,16 +727,16 @@ generate_interaction_plot_configs <- function(df, type) { stats_plot_configs <- list() stats_boxplot_configs <- list() delta_plot_configs <- list() - - # Overall statistics plots + + # Overall statistics plots (use df) OrfRep <- first(df$OrfRep) # this should correspond to the reference strain - + for (plot_type in c("scatter", "box")) { - + for (var in names(limits_map)) { y_limits <- limits_map[[var]] y_span <- y_limits[2] - y_limits[1] - + # Common plot configuration plot_config <- list( df = df, @@ -790,7 +748,7 @@ generate_interaction_plot_configs <- function(df, type) { x_breaks = unique(df$conc_num_factor_factor), x_labels = as.character(unique(df$conc_num)) ) - + # Add specific configurations for scatter and box plots if (plot_type == "scatter") { plot_config$plot_type <- "scatter" @@ -803,91 +761,96 @@ generate_interaction_plot_configs <- function(df, type) { center_point = TRUE ) plot_config$position <- "jitter" - - annotations <- list( - list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min - list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="), - list(x = 0.25, y = y_limits[1], label = "SM =") - ) - - # Loop over unique x values and add NG, DB, SM values at calculated y positions - for (x_val in unique(df$conc_num_factor_factor)) { - current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val) - annotations <- append(annotations, list( - list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = first(current_df$NG, default = 0)), - list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = first(current_df$DB, default = 0)), - list(x = x_val, y = y_limits[1], label = first(current_df$SM, default = 0)) - )) - } - - plot_config$annotations <- annotations + + annotations <- list( + list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min + list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="), + list(x = 0.25, y = y_limits[1], label = "SM =") + ) + + # Loop over unique x values and add NG, DB, SM values at calculated y positions + for (x_val in unique(df$conc_num_factor_factor)) { + current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val) + annotations <- append(annotations, list( + list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = sum(current_df$NG, na.rm = TRUE)), + list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = sum(current_df$DB, na.rm = TRUE)), + list(x = x_val, y = y_limits[1], label = sum(current_df$SM, na.rm = TRUE)) + )) + } + + plot_config$annotations <- annotations # Append to scatter plot configurations stats_plot_configs <- append(stats_plot_configs, list(plot_config)) - + } else if (plot_type == "box") { plot_config$plot_type <- "box" plot_config$title <- sprintf("%s Boxplot RF for %s with SD", OrfRep, var) plot_config$position <- "dodge" # Boxplots don't need jitter, use dodge instead - + # Append to boxplot configurations stats_boxplot_configs <- append(stats_boxplot_configs, list(plot_config)) } } } - - # Delta interaction plots + + # Delta interaction plots (use df_calculations and df_interactions) if (type == "reference") { group_vars <- c("OrfRep", "Gene", "num") } else if (type == "deletion") { group_vars <- c("OrfRep", "Gene") } - + delta_limits_map <- list( L = c(-60, 60), K = c(-60, 60), r = c(-0.6, 0.6), AUC = c(-6000, 6000) ) - - grouped_data <- df %>% + + grouped_data <- df_calculations %>% group_by(across(all_of(group_vars))) %>% group_split() - + for (group_data in grouped_data) { OrfRep <- first(group_data$OrfRep) Gene <- first(group_data$Gene) num <- if ("num" %in% names(group_data)) first(group_data$num) else "" - + if (type == "reference") { OrfRepTitle <- paste(OrfRep, Gene, num, sep = "_") } else if (type == "deletion") { OrfRepTitle <- OrfRep } - + + # Get corresponding interaction row + interaction_row <- df_interactions %>% + filter(if_all(all_of(group_vars), ~ . == first(.))) %>% + slice(1) + for (var in names(delta_limits_map)) { y_limits <- delta_limits_map[[var]] y_span <- y_limits[2] - y_limits[1] - + # Error bars WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0) - - # Z_Shift and lm values - Z_Shift_value <- round(first(group_data[[paste0("Z_Shift_", var)]], default = 0), 2) - Z_lm_value <- round(first(group_data[[paste0("Z_lm_", var)]], default = 0), 2) - R_squared_value <- round(first(group_data[[paste0("R_Squared_", var)]], default = 0), 2) - - # NG, DB, SM values - NG_value <- first(group_data$NG, default = 0) - DB_value <- first(group_data$DB, default = 0) - SM_value <- first(group_data$SM, default = 0) - - # Use the pre-calculated lm intercept and slope from the dataframe + + # Z_Shift and lm values from interaction_row + Z_Shift_value <- round(first(interaction_row[[paste0("Z_Shift_", var)]], default = 0), 2) + Z_lm_value <- round(first(interaction_row[[paste0("Z_lm_", var)]], default = 0), 2) + R_squared_value <- round(first(interaction_row[[paste0("R_Squared_", var)]], default = 0), 2) + + # NG, DB, SM values from interaction_row + NG_value <- first(interaction_row$NG, default = 0) + DB_value <- first(interaction_row$DB, default = 0) + SM_value <- first(interaction_row$SM, default = 0) + + # Use the pre-calculated lm intercept and slope from group_data lm_intercept_col <- paste0("lm_intercept_", var) lm_slope_col <- paste0("lm_slope_", var) lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0) lm_slope_value <- first(group_data[[lm_slope_col]], default = 0) - + plot_config <- list( df = group_data, plot_type = "scatter", @@ -906,8 +869,9 @@ generate_interaction_plot_configs <- function(df, type) { ), error_bar = TRUE, error_bar_params = list( - ymin = 0 - (2 * WT_sd_value), - ymax = 0 + (2 * WT_sd_value), + # Passing constants directly + ymin = -2 * WT_sd_value, + ymax = 2 * WT_sd_value, color = "black" ), smooth = TRUE, @@ -922,43 +886,45 @@ generate_interaction_plot_configs <- function(df, type) { delta_plot_configs <- append(delta_plot_configs, list(plot_config)) } } - + # Calculate dynamic grid layout grid_ncol <- 4 num_plots <- length(delta_plot_configs) grid_nrow <- ceiling(num_plots / grid_ncol) - + return(list( list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_plot_configs), list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_boxplot_configs), - list(grid_layout = list(ncol = 4, nrow = grid_nrow), plots = delta_plot_configs) + list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = delta_plot_configs) )) } -generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) { - sd_bands <- c(1, 2, 3) - plot_configs <- list() +generate_rank_plot_configs <- function(df_interactions, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) { + + plot_configs <- list() + sd_bands <- c(1, 2, 3) - variables <- c("L", "K") - # Adjust (if necessary) and rank columns + variables <- c("L", "K") for (variable in variables) { if (adjust) { - df[[paste0("Avg_Zscore_", variable)]] <- ifelse(is.na(df[[paste0("Avg_Zscore_", variable)]]), 0.001, df[[paste0("Avg_Zscore_", variable)]]) - df[[paste0("Z_lm_", variable)]] <- ifelse(is.na(df[[paste0("Z_lm_", variable)]]), 0.001, df[[paste0("Z_lm_", variable)]]) + df_interactions[[paste0("Avg_Zscore_", variable)]] <- + ifelse(is.na(df_interactions[[paste0("Avg_Zscore_", variable)]]), 0.001, df_interactions[[paste0("Avg_Zscore_", variable)]]) + df_interactions[[paste0("Z_lm_", variable)]] <- + ifelse(is.na(df_interactions[[paste0("Z_lm_", variable)]]), 0.001, df_interactions[[paste0("Z_lm_", variable)]]) } - df[[paste0("Rank_", variable)]] <- rank(df[[paste0("Avg_Zscore_", variable)]], na.last = "keep") - df[[paste0("Rank_lm_", variable)]] <- rank(df[[paste0("Z_lm_", variable)]], na.last = "keep") + df_interactions[[paste0("Rank_", variable)]] <- rank(df_interactions[[paste0("Avg_Zscore_", variable)]], na.last = "keep") + df_interactions[[paste0("Rank_lm_", variable)]] <- rank(df_interactions[[paste0("Z_lm_", variable)]], na.last = "keep") } # Helper function to create a plot configuration create_plot_config <- function(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = TRUE) { - num_enhancers <- sum(df[[zscore_var]] >= sd_band, na.rm = TRUE) - num_suppressors <- sum(df[[zscore_var]] <= -sd_band, na.rm = TRUE) - + num_enhancers <- sum(df_interactions[[zscore_var]] >= sd_band, na.rm = TRUE) + num_suppressors <- sum(df_interactions[[zscore_var]] <= -sd_band, na.rm = TRUE) + # Default plot config plot_config <- list( - df = df, + df = df_interactions, x_var = rank_var, y_var = zscore_var, plot_type = "scatter", @@ -975,18 +941,18 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA x_label = "Rank", legend_position = "none" ) - + if (with_annotations) { # Add specific annotations for plots with annotations plot_config$annotations <- list( list( - x = median(df[[rank_var]], na.rm = TRUE), - y = max(df[[zscore_var]], na.rm = TRUE) * 0.9, + x = median(df_interactions[[rank_var]], na.rm = TRUE), + y = max(df_interactions[[zscore_var]], na.rm = TRUE) * 0.9, label = paste("Deletion Enhancers =", num_enhancers) ), list( - x = median(df[[rank_var]], na.rm = TRUE), - y = min(df[[zscore_var]], na.rm = TRUE) * 0.9, + x = median(df_interactions[[rank_var]], na.rm = TRUE), + y = min(df_interactions[[zscore_var]], na.rm = TRUE) * 0.9, label = paste("Deletion Suppressors =", num_suppressors) ) ) @@ -1000,12 +966,12 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA rank_var <- if (is_lm) paste0("Rank_lm_", variable) else paste0("Rank_", variable) zscore_var <- if (is_lm) paste0("Z_lm_", variable) else paste0("Avg_Zscore_", variable) y_label <- if (is_lm) paste("Int Z score", variable) else paste("Avg Z score", variable) - + # Loop through SD bands for (sd_band in sd_bands) { # Create plot with annotations plot_configs[[length(plot_configs) + 1]] <- create_plot_config(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = TRUE) - + # Create plot without annotations plot_configs[[length(plot_configs) + 1]] <- create_plot_config(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = FALSE) } @@ -1019,7 +985,7 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA return(list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = plot_configs)) } -generate_correlation_plot_configs <- function(df, correlation_stats) { +generate_correlation_plot_configs <- function(df_interactions) { # Define relationships for different-variable correlations relationships <- list( list(x = "L", y = "K"), @@ -1030,6 +996,23 @@ generate_correlation_plot_configs <- function(df, correlation_stats) { list(x = "r", y = "AUC") ) + correlation_stats <- list() + + for (rel in relationships) { + x_var <- paste0("Z_lm_", rel$x) + y_var <- paste0("Z_lm_", rel$y) + lm_fit <- lm(df_interactions[[y_var]] ~ df_interactions[[x_var]]) + intercept <- coef(lm_fit)[1] + slope <- coef(lm_fit)[2] + r_squared <- summary(lm_fit)$r.squared + relationship_name <- paste0(rel$x, "_vs_", rel$y) + correlation_stats[[relationship_name]] <- list( + intercept = intercept, + slope = slope, + r_squared = r_squared + ) + } + plot_configs <- list() # Iterate over the option to highlight cyan points (TRUE/FALSE) @@ -1053,15 +1036,15 @@ generate_correlation_plot_configs <- function(df, correlation_stats) { # Construct plot config plot_config <- list( - df = df, + df = df_interactions, x_var = x_var, y_var = y_var, plot_type = "scatter", title = plot_label, annotations = list( list( - x = mean(df[[x_var]], na.rm = TRUE), - y = mean(df[[y_var]], na.rm = TRUE), + x = mean(df_interactions[[x_var]], na.rm = TRUE), + y = mean(df_interactions[[y_var]], na.rm = TRUE), label = paste("R-squared =", round(r_squared, 3)) ) ), @@ -1371,9 +1354,8 @@ main <- function() { group_vars = c("OrfRep", "Gene", "num", "conc_num") )$df_with_stats reference_results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num")) - zscore_calculations_reference <- reference_results$calculations - zscore_interactions_reference <- reference_results$interactions - zscore_interactions_reference_joined <- reference_results$full_data + df_calculations_reference <- reference_results$calculations + df_interactions_reference <- reference_results$interactions message("Setting missing deletion values to the highest theoretical value at each drug conc for L") df_deletion <- df_na_stats %>% # formerly X2 @@ -1394,38 +1376,39 @@ main <- function() { group_vars = c("OrfRep", "Gene", "conc_num") )$df_with_stats deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene")) - zscore_calculations <- deletion_results$calculations - zscore_interactions <- deletion_results$interactions - zscore_interactions_joined <- deletion_results$full_data + df_calculations <- deletion_results$calculations + df_interactions <- deletion_results$interactions # Writing Z-Scores to file - write.csv(zscore_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) - write.csv(zscore_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) - write.csv(zscore_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) - write.csv(zscore_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) + write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) + write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) + write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) + write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) # Create interaction plots message("Generating reference interaction plots") - reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined, "reference") + reference_plot_configs <- generate_interaction_plot_configs( + df_reference_stats, df_calculations_reference, df_interactions_reference, "reference") generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs) message("Generating deletion interaction plots") - deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined, "deletion") + deletion_plot_configs <- generate_interaction_plot_configs( + df_deletion_stats, df_calculations, df_interactions, "deletion") generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs) # Define conditions for enhancers and suppressors # TODO Add to study config? threshold <- 2 - enhancer_condition_L <- zscore_interactions$Avg_Zscore_L >= threshold - suppressor_condition_L <- zscore_interactions$Avg_Zscore_L <= -threshold - enhancer_condition_K <- zscore_interactions$Avg_Zscore_K >= threshold - suppressor_condition_K <- zscore_interactions$Avg_Zscore_K <= -threshold + enhancer_condition_L <- df_interactions$Avg_Zscore_L >= threshold + suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -threshold + enhancer_condition_K <- df_interactions$Avg_Zscore_K >= threshold + suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -threshold # Subset data - enhancers_L <- zscore_interactions[enhancer_condition_L, ] - suppressors_L <- zscore_interactions[suppressor_condition_L, ] - enhancers_K <- zscore_interactions[enhancer_condition_K, ] - suppressors_K <- zscore_interactions[suppressor_condition_K, ] + enhancers_L <- df_interactions[enhancer_condition_L, ] + suppressors_L <- df_interactions[suppressor_condition_L, ] + enhancers_K <- df_interactions[enhancer_condition_K, ] + suppressors_K <- df_interactions[suppressor_condition_K, ] # Save enhancers and suppressors message("Writing enhancer/suppressor csv files") @@ -1435,8 +1418,8 @@ main <- function() { write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE) # Combine conditions for enhancers and suppressors - enhancers_and_suppressors_L <- zscore_interactions[enhancer_condition_L | suppressor_condition_L, ] - enhancers_and_suppressors_K <- zscore_interactions[enhancer_condition_K | suppressor_condition_K, ] + enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ] + enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ] # Save combined enhancers and suppressors write.csv(enhancers_and_suppressors_L, @@ -1446,10 +1429,10 @@ main <- function() { # Handle linear model based enhancers and suppressors lm_threshold <- 2 # TODO add to study config? - enhancers_lm_L <- zscore_interactions[zscore_interactions$Z_lm_L >= lm_threshold, ] - suppressors_lm_L <- zscore_interactions[zscore_interactions$Z_lm_L <= -lm_threshold, ] - enhancers_lm_K <- zscore_interactions[zscore_interactions$Z_lm_K >= lm_threshold, ] - suppressors_lm_K <- zscore_interactions[zscore_interactions$Z_lm_K <= -lm_threshold, ] + enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_threshold, ] + suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_threshold, ] + enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_threshold, ] + suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_threshold, ] # Save linear model based enhancers and suppressors message("Writing linear model enhancer/suppressor csv files") @@ -1464,7 +1447,7 @@ main <- function() { message("Generating rank plots") rank_plot_configs <- generate_rank_plot_configs( - df = zscore_interactions_joined, + df_interactions, is_lm = FALSE, adjust = TRUE ) @@ -1473,16 +1456,37 @@ main <- function() { message("Generating ranked linear model plots") rank_lm_plot_configs <- generate_rank_plot_configs( - df = zscore_interactions_joined, + df_interactions, is_lm = TRUE, adjust = TRUE ) generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm", plot_configs = rank_lm_plot_configs) + overlap_threshold <- 2 + df_interactions_filtered <- df_interactions %>% + filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>% + mutate( + Overlap = case_when( + Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both", + Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both", + Z_lm_L >= overlap_threshold & Avg_Zscore_L <= overlap_threshold ~ "Deletion Enhancer lm only", + Z_lm_L <= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only", + Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= -overlap_threshold ~ "Deletion Suppressor lm only", + Z_lm_L >= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only", + Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score", + Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score", + TRUE ~ "No Effect" + ), + lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared, + lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared, + lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared, + lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared + ) + message("Generating filtered ranked plots") rank_plot_filtered_configs <- generate_rank_plot_configs( - df = zscore_interactions_filtered, + df_interactions_filtered, is_lm = FALSE, adjust = FALSE, overlap_color = TRUE @@ -1494,7 +1498,7 @@ main <- function() { message("Generating filtered ranked linear model plots") rank_plot_lm_filtered_configs <- generate_rank_plot_configs( - df = zscore_interactions_filtered, + df_interactions_filtered, is_lm = TRUE, adjust = FALSE, overlap_color = TRUE @@ -1505,7 +1509,9 @@ main <- function() { plot_configs = rank_plot_lm_filtered_configs) message("Generating correlation curve parameter pair plots") - correlation_plot_configs <- generate_correlation_plot_configs(zscore_interactions_filtered) + correlation_plot_configs <- generate_correlation_plot_configs( + df_interactions_filtered + ) generate_and_save_plots( out_dir = out_dir, filename = "correlation_cpps",