From c4f398be8273db5714fd79842656dca88046401a Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Fri, 4 Oct 2024 02:30:13 -0400 Subject: [PATCH] Move correlation modeling to calculate_interction_scores --- .../apps/r/calculate_interaction_zscores.R | 381 +++++++++--------- 1 file changed, 187 insertions(+), 194 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index d63d0f25..1a59096b 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -214,7 +214,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol # Calculate WT statistics from df_bg wt_stats <- df_bg %>% - filter(conc_num == 0) %>% # use the zero drug concentration background + filter(conc_num == 0) %>% summarise( WT_L = mean(mean_L, na.rm = TRUE), WT_sd_L = mean(sd_L, na.rm = TRUE), @@ -294,7 +294,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r), Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC), Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L), - + # Calculate Z-scores Zscore_L = Delta_L / WT_sd_L, Zscore_K = Delta_K / WT_sd_K, @@ -302,31 +302,48 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol Zscore_AUC = Delta_AUC / WT_sd_AUC ) %>% group_modify(~ { - # Perform linear models - lm_L <- lm(Delta_L ~ conc_num_factor, data = .x) - lm_K <- lm(Delta_K ~ conc_num_factor, data = .x) - lm_r <- lm(Delta_r ~ conc_num_factor, data = .x) - lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x) - - .x %>% - mutate( - lm_intercept_L = coef(lm_L)[1], - lm_slope_L = coef(lm_L)[2], - R_Squared_L = summary(lm_L)$r.squared, - lm_Score_L = max_conc * lm_slope_L + lm_intercept_L, - lm_intercept_K = coef(lm_K)[1], - lm_slope_K = coef(lm_K)[2], - R_Squared_K = summary(lm_K)$r.squared, - lm_Score_K = max_conc * lm_slope_K + lm_intercept_K, - lm_intercept_r = coef(lm_r)[1], - lm_slope_r = coef(lm_r)[2], - R_Squared_r = summary(lm_r)$r.squared, - lm_Score_r = max_conc * lm_slope_r + lm_intercept_r, - lm_intercept_AUC = coef(lm_AUC)[1], - lm_slope_AUC = coef(lm_AUC)[2], - R_Squared_AUC = summary(lm_AUC)$r.squared, - lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC - ) + # Perform linear models only if there are enough unique conc_num_factor levels + if (length(unique(.x$conc_num_factor)) > 1) { + + # Filter and calculate each lm() separately with individual checks for NAs + lm_L <- if (!all(is.na(.x$Delta_L))) tryCatch(lm(Delta_L ~ conc_num_factor, data = .x), error = function(e) NULL) else NULL + lm_K <- if (!all(is.na(.x$Delta_K))) tryCatch(lm(Delta_K ~ conc_num_factor, data = .x), error = function(e) NULL) else NULL + lm_r <- if (!all(is.na(.x$Delta_r))) tryCatch(lm(Delta_r ~ conc_num_factor, data = .x), error = function(e) NULL) else NULL + lm_AUC <- if (!all(is.na(.x$Delta_AUC))) tryCatch(lm(Delta_AUC ~ conc_num_factor, data = .x), error = function(e) NULL) else NULL + + # Mutate results for each lm if it was successfully calculated, suppress warnings for perfect fits + .x %>% + mutate( + lm_intercept_L = if (!is.null(lm_L)) coef(lm_L)[1] else NA, + lm_slope_L = if (!is.null(lm_L)) coef(lm_L)[2] else NA, + R_Squared_L = if (!is.null(lm_L)) suppressWarnings(summary(lm_L)$r.squared) else NA, + lm_Score_L = if (!is.null(lm_L)) max_conc * coef(lm_L)[2] + coef(lm_L)[1] else NA, + + lm_intercept_K = if (!is.null(lm_K)) coef(lm_K)[1] else NA, + lm_slope_K = if (!is.null(lm_K)) coef(lm_K)[2] else NA, + R_Squared_K = if (!is.null(lm_K)) suppressWarnings(summary(lm_K)$r.squared) else NA, + lm_Score_K = if (!is.null(lm_K)) max_conc * coef(lm_K)[2] + coef(lm_K)[1] else NA, + + lm_intercept_r = if (!is.null(lm_r)) coef(lm_r)[1] else NA, + lm_slope_r = if (!is.null(lm_r)) coef(lm_r)[2] else NA, + R_Squared_r = if (!is.null(lm_r)) suppressWarnings(summary(lm_r)$r.squared) else NA, + lm_Score_r = if (!is.null(lm_r)) max_conc * coef(lm_r)[2] + coef(lm_r)[1] else NA, + + lm_intercept_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[1] else NA, + lm_slope_AUC = if (!is.null(lm_AUC)) coef(lm_AUC)[2] else NA, + R_Squared_AUC = if (!is.null(lm_AUC)) suppressWarnings(summary(lm_AUC)$r.squared) else NA, + lm_Score_AUC = if (!is.null(lm_AUC)) max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1] else NA + ) + } else { + # If not enough conc_num_factor levels, set lm-related values to NA + .x %>% + mutate( + lm_intercept_L = NA, lm_slope_L = NA, R_Squared_L = NA, lm_Score_L = NA, + lm_intercept_K = NA, lm_slope_K = NA, R_Squared_K = NA, lm_Score_K = NA, + lm_intercept_r = NA, lm_slope_r = NA, R_Squared_r = NA, lm_Score_r = NA, + lm_intercept_AUC = NA, lm_slope_AUC = NA, R_Squared_AUC = NA, lm_Score_AUC = NA + ) + } }) %>% ungroup() @@ -344,6 +361,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol .groups = "drop" ) + # Add lm score means and standard deviations to calculations calculations <- calculations %>% mutate( lm_mean_L = lm_means_sds$lm_mean_L, @@ -356,7 +374,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol lm_sd_AUC = lm_means_sds$lm_sd_AUC ) - # Continue with gene Z-scores and interactions + # Calculate Z-lm scores calculations <- calculations %>% mutate( Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L, @@ -397,7 +415,7 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol R_Squared_K = first(R_Squared_K), R_Squared_r = first(R_Squared_r), R_Squared_AUC = first(R_Squared_AUC), - + # NG, DB, SM values NG = first(NG), DB = first(DB), @@ -406,12 +424,34 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol .groups = "drop" ) + # Add overlap threshold categories based on Z-lm and Avg-Z scores + interactions <- interactions %>% + filter(!is.na(Z_lm_L)) %>% + mutate( + Overlap = case_when( + Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both", + Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both", + Z_lm_L >= overlap_threshold & Avg_Zscore_L <= overlap_threshold ~ "Deletion Enhancer lm only", + Z_lm_L <= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only", + Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= -overlap_threshold ~ "Deletion Suppressor lm only", + Z_lm_L >= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only", + Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Zscore", + Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore", + TRUE ~ "No Effect" + ), + + # For correlations + lm_R_squared_L = if (!all(is.na(Z_lm_L)) && !all(is.na(Avg_Zscore_L))) summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared else NA, + lm_R_squared_K = if (!all(is.na(Z_lm_K)) && !all(is.na(Avg_Zscore_K))) summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared else NA, + lm_R_squared_r = if (!all(is.na(Z_lm_r)) && !all(is.na(Avg_Zscore_r))) summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared else NA, + lm_R_squared_AUC = if (!all(is.na(Z_lm_AUC)) && !all(is.na(Avg_Zscore_AUC))) summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared else NA + ) + # Creating the final calculations and interactions dataframes with only required columns for csv output calculations_df <- calculations %>% select( all_of(group_vars), - conc_num, conc_num_factor, conc_num_factor_factor, - N, NG, DB, SM, + conc_num, conc_num_factor, conc_num_factor_factor, N, mean_L, median_L, sd_L, se_L, mean_K, median_K, sd_K, se_K, mean_r, median_r, sd_r, se_r, @@ -432,23 +472,22 @@ calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshol Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC, Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC, Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC, - Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC + Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC, + lm_R_squared_L, lm_R_squared_K, lm_R_squared_r, lm_R_squared_AUC, + Overlap ) + # Join calculations and interactions to avoid dimension mismatch calculations_no_overlap <- calculations %>% - # DB, NG, SM are same as in interactions, the rest may be different and need to be checked - select(-any_of(c( - "DB", "NG", "SM", + select(-any_of(c("DB", "NG", "SM", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", - "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC" - ))) + "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC"))) - # Use left_join to avoid dimension mismatch issues full_data <- calculations_no_overlap %>% - left_join(interactions, by = group_vars) + left_join(interactions_df, by = group_vars) - # Return full_data and the two required dataframes (calculations and interactions) + # Return final dataframes return(list( calculations = calculations_df, interactions = interactions_df, @@ -781,7 +820,7 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df } generate_interaction_plot_configs <- function(df, type) { - + # Define the y-limits for the plots limits_map <- list( L = c(0, 130), @@ -793,16 +832,16 @@ generate_interaction_plot_configs <- function(df, type) { stats_plot_configs <- list() stats_boxplot_configs <- list() delta_plot_configs <- list() - - # Overall statistics plots (use df) + + # Overall statistics plots OrfRep <- first(df$OrfRep) # this should correspond to the reference strain - + for (plot_type in c("scatter", "box")) { - + for (var in names(limits_map)) { y_limits <- limits_map[[var]] y_span <- y_limits[2] - y_limits[1] - + # Common plot configuration plot_config <- list( df = df, @@ -814,7 +853,7 @@ generate_interaction_plot_configs <- function(df, type) { x_breaks = unique(df$conc_num_factor_factor), x_labels = as.character(unique(df$conc_num)) ) - + # Add specific configurations for scatter and box plots if (plot_type == "scatter") { plot_config$plot_type <- "scatter" @@ -827,96 +866,91 @@ generate_interaction_plot_configs <- function(df, type) { center_point = TRUE ) plot_config$position <- "jitter" - - # Annotation labels - annotations <- list( - list(x = 0, y = y_limits[1] + 0.1 * y_span, label = "NG ="), - list(x = 0, y = y_limits[1] + 0.05 * y_span, label = "DB ="), - list(x = 0, y = y_limits[1], label = "SM =") - ) - # Annotation values - for (x_val in unique(df$conc_num_factor_factor)) { - current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val) - annotations <- append(annotations, list( - list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = sum(current_df$NG, na.rm = TRUE)), - list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = sum(current_df$DB, na.rm = TRUE)), - list(x = x_val, y = y_limits[1], label = sum(current_df$SM, na.rm = TRUE)) - )) - } - - plot_config$annotations <- annotations + annotations <- list( + list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = " NG ="), # Slightly above y-min + list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = " DB ="), + list(x = 0.25, y = y_limits[1], label = " SM =") + ) + + # Loop over unique x values and add NG, DB, SM values at calculated y positions + for (x_val in unique(df$conc_num_factor_factor)) { + current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val) + annotations <- append(annotations, list( + list(x = x_val, y = y_limits[1] + 0.1 * y_span, label = first(current_df$NG, default = 0)), + list(x = x_val, y = y_limits[1] + 0.05 * y_span, label = first(current_df$DB, default = 0)), + list(x = x_val, y = y_limits[1], label = first(current_df$SM, default = 0)) + )) + } + + plot_config$annotations <- annotations # Append to scatter plot configurations stats_plot_configs <- append(stats_plot_configs, list(plot_config)) - + } else if (plot_type == "box") { plot_config$plot_type <- "box" plot_config$title <- sprintf("%s Boxplot RF for %s with SD", OrfRep, var) plot_config$position <- "dodge" # Boxplots don't need jitter, use dodge instead - + # Append to boxplot configurations stats_boxplot_configs <- append(stats_boxplot_configs, list(plot_config)) } } } - + + # Delta interaction plots if (type == "reference") { group_vars <- c("OrfRep", "Gene", "num") } else if (type == "deletion") { group_vars <- c("OrfRep", "Gene") } - + delta_limits_map <- list( L = c(-60, 60), K = c(-60, 60), r = c(-0.6, 0.6), AUC = c(-6000, 6000) ) - - grouped_data <- df_calculations %>% + + grouped_data <- df %>% group_by(across(all_of(group_vars))) %>% group_split() - + for (group_data in grouped_data) { OrfRep <- first(group_data$OrfRep) Gene <- first(group_data$Gene) num <- if ("num" %in% names(group_data)) first(group_data$num) else "" - + if (type == "reference") { OrfRepTitle <- paste(OrfRep, Gene, num, sep = "_") } else if (type == "deletion") { OrfRepTitle <- OrfRep } - - # Get corresponding interaction row - interaction_row <- df_interactions %>% - filter(if_all(all_of(group_vars), ~ . == first(.))) %>% - slice(1) - + for (var in names(delta_limits_map)) { y_limits <- delta_limits_map[[var]] y_span <- y_limits[2] - y_limits[1] - + # Error bars WT_sd_value <- first(group_data[[paste0("WT_sd_", var)]], default = 0) - - # Z_Shift and lm values from interaction_row - Z_Shift_value <- round(first(interaction_row[[paste0("Z_Shift_", var)]], default = 0), 2) - Z_lm_value <- round(first(interaction_row[[paste0("Z_lm_", var)]], default = 0), 2) - R_squared_value <- round(first(interaction_row[[paste0("R_Squared_", var)]], default = 0), 2) - - # NG, DB, SM values from interaction_row - NG_value <- first(interaction_row$NG, default = 0) - DB_value <- first(interaction_row$DB, default = 0) - SM_value <- first(interaction_row$SM, default = 0) - - # Use the pre-calculated lm intercept and slope from group_data + + # Z_Shift and lm values + Z_Shift_value <- round(first(group_data[[paste0("Z_Shift_", var)]], default = 0), 2) + Z_lm_value <- round(first(group_data[[paste0("Z_lm_", var)]], default = 0), 2) + R_squared_value <- round(first(group_data[[paste0("R_Squared_", var)]], default = 0), 2) + + # NG, DB, SM values + NG_value <- first(group_data$NG, default = 0) + DB_value <- first(group_data$DB, default = 0) + SM_value <- first(group_data$SM, default = 0) + + # Use the pre-calculated lm intercept and slope from the dataframe lm_intercept_col <- paste0("lm_intercept_", var) lm_slope_col <- paste0("lm_slope_", var) lm_intercept_value <- first(group_data[[lm_intercept_col]], default = 0) lm_slope_value <- first(group_data[[lm_slope_col]], default = 0) - + plot_config <- list( df = group_data, plot_type = "scatter", @@ -935,9 +969,8 @@ generate_interaction_plot_configs <- function(df, type) { ), error_bar = TRUE, error_bar_params = list( - # Passing constants directly - ymin = -2 * WT_sd_value, - ymax = 2 * WT_sd_value, + ymin = 0 - (2 * WT_sd_value), + ymax = 0 + (2 * WT_sd_value), color = "black" ), smooth = TRUE, @@ -952,45 +985,43 @@ generate_interaction_plot_configs <- function(df, type) { delta_plot_configs <- append(delta_plot_configs, list(plot_config)) } } - + # Calculate dynamic grid layout grid_ncol <- 4 num_plots <- length(delta_plot_configs) grid_nrow <- ceiling(num_plots / grid_ncol) - + return(list( list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_plot_configs), list(grid_layout = list(ncol = 2, nrow = 2), plots = stats_boxplot_configs), - list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = delta_plot_configs) + list(grid_layout = list(ncol = 4, nrow = grid_nrow), plots = delta_plot_configs) )) } -generate_rank_plot_configs <- function(df_interactions, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) { - - plot_configs <- list() +generate_rank_plot_configs <- function(df, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) { sd_bands <- c(1, 2, 3) + plot_configs <- list() - # Adjust (if necessary) and rank columns variables <- c("L", "K") + + # Adjust (if necessary) and rank columns for (variable in variables) { if (adjust) { - df_interactions[[paste0("Avg_Zscore_", variable)]] <- - ifelse(is.na(df_interactions[[paste0("Avg_Zscore_", variable)]]), 0.001, df_interactions[[paste0("Avg_Zscore_", variable)]]) - df_interactions[[paste0("Z_lm_", variable)]] <- - ifelse(is.na(df_interactions[[paste0("Z_lm_", variable)]]), 0.001, df_interactions[[paste0("Z_lm_", variable)]]) + df[[paste0("Avg_Zscore_", variable)]] <- ifelse(is.na(df[[paste0("Avg_Zscore_", variable)]]), 0.001, df[[paste0("Avg_Zscore_", variable)]]) + df[[paste0("Z_lm_", variable)]] <- ifelse(is.na(df[[paste0("Z_lm_", variable)]]), 0.001, df[[paste0("Z_lm_", variable)]]) } - df_interactions[[paste0("Rank_", variable)]] <- rank(df_interactions[[paste0("Avg_Zscore_", variable)]], na.last = "keep") - df_interactions[[paste0("Rank_lm_", variable)]] <- rank(df_interactions[[paste0("Z_lm_", variable)]], na.last = "keep") + df[[paste0("Rank_", variable)]] <- rank(df[[paste0("Avg_Zscore_", variable)]], na.last = "keep") + df[[paste0("Rank_lm_", variable)]] <- rank(df[[paste0("Z_lm_", variable)]], na.last = "keep") } # Helper function to create a plot configuration create_plot_config <- function(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = TRUE) { - num_enhancers <- sum(df_interactions[[zscore_var]] >= sd_band, na.rm = TRUE) - num_suppressors <- sum(df_interactions[[zscore_var]] <= -sd_band, na.rm = TRUE) - + num_enhancers <- sum(df[[zscore_var]] >= sd_band, na.rm = TRUE) + num_suppressors <- sum(df[[zscore_var]] <= -sd_band, na.rm = TRUE) + # Default plot config plot_config <- list( - df = df_interactions, + df = df, x_var = rank_var, y_var = zscore_var, plot_type = "scatter", @@ -1007,18 +1038,18 @@ generate_rank_plot_configs <- function(df_interactions, is_lm = FALSE, adjust = x_label = "Rank", legend_position = "none" ) - + if (with_annotations) { # Add specific annotations for plots with annotations plot_config$annotations <- list( list( - x = median(df_interactions[[rank_var]], na.rm = TRUE), - y = max(df_interactions[[zscore_var]], na.rm = TRUE) * 0.9, + x = median(df[[rank_var]], na.rm = TRUE), + y = max(df[[zscore_var]], na.rm = TRUE) * 0.9, label = paste("Deletion Enhancers =", num_enhancers) ), list( - x = median(df_interactions[[rank_var]], na.rm = TRUE), - y = min(df_interactions[[zscore_var]], na.rm = TRUE) * 0.9, + x = median(df[[rank_var]], na.rm = TRUE), + y = min(df[[zscore_var]], na.rm = TRUE) * 0.9, label = paste("Deletion Suppressors =", num_suppressors) ) ) @@ -1032,12 +1063,12 @@ generate_rank_plot_configs <- function(df_interactions, is_lm = FALSE, adjust = rank_var <- if (is_lm) paste0("Rank_lm_", variable) else paste0("Rank_", variable) zscore_var <- if (is_lm) paste0("Z_lm_", variable) else paste0("Avg_Zscore_", variable) y_label <- if (is_lm) paste("Int Z score", variable) else paste("Avg Z score", variable) - + # Loop through SD bands for (sd_band in sd_bands) { # Create plot with annotations plot_configs[[length(plot_configs) + 1]] <- create_plot_config(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = TRUE) - + # Create plot without annotations plot_configs[[length(plot_configs) + 1]] <- create_plot_config(variable, rank_var, zscore_var, y_label, sd_band, with_annotations = FALSE) } @@ -1051,7 +1082,7 @@ generate_rank_plot_configs <- function(df_interactions, is_lm = FALSE, adjust = return(list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = plot_configs)) } -generate_correlation_plot_configs <- function(df_interactions) { +generate_correlation_plot_configs <- function(df, correlation_stats) { # Define relationships for different-variable correlations relationships <- list( list(x = "L", y = "K"), @@ -1062,23 +1093,6 @@ generate_correlation_plot_configs <- function(df_interactions) { list(x = "r", y = "AUC") ) - correlation_stats <- list() - - for (rel in relationships) { - x_var <- paste0("Z_lm_", rel$x) - y_var <- paste0("Z_lm_", rel$y) - lm_fit <- lm(df_interactions[[y_var]] ~ df_interactions[[x_var]]) - intercept <- coef(lm_fit)[1] - slope <- coef(lm_fit)[2] - r_squared <- summary(lm_fit)$r.squared - relationship_name <- paste0(rel$x, "_vs_", rel$y) - correlation_stats[[relationship_name]] <- list( - intercept = intercept, - slope = slope, - r_squared = r_squared - ) - } - plot_configs <- list() # Iterate over the option to highlight cyan points (TRUE/FALSE) @@ -1102,15 +1116,15 @@ generate_correlation_plot_configs <- function(df_interactions) { # Construct plot config plot_config <- list( - df = df_interactions, + df = df, x_var = x_var, y_var = y_var, plot_type = "scatter", title = plot_label, annotations = list( list( - x = mean(df_interactions[[x_var]], na.rm = TRUE), - y = mean(df_interactions[[y_var]], na.rm = TRUE), + x = mean(df[[x_var]], na.rm = TRUE), + y = mean(df[[y_var]], na.rm = TRUE), label = paste("R-squared =", round(r_squared, 3)) ) ), @@ -1426,39 +1440,39 @@ main <- function() { write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) - message("Setting missing deletion values to the highest theoretical value at each drug conc for L") - df_deletion <- df_na_stats %>% # formerly X2 - filter(OrfRep != strain) %>% - filter(!is.na(L)) %>% - group_by(OrfRep, Gene, conc_num) %>% - mutate( - max_l_theoretical = max(max_L, na.rm = TRUE), - L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L), - SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM), - L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>% - ungroup() + # message("Setting missing deletion values to the highest theoretical value at each drug conc for L") + # df_deletion <- df_na_stats %>% # formerly X2 + # filter(OrfRep != strain) %>% + # filter(!is.na(L)) %>% + # group_by(OrfRep, Gene, conc_num) %>% + # mutate( + # max_l_theoretical = max(max_L, na.rm = TRUE), + # L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L), + # SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM), + # L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>% + # ungroup() - message("Calculating deletion strain(s) summary statistics") - df_deletion_stats <- calculate_summary_stats( - df = df_deletion, - variables = c("L", "K", "r", "AUC"), - group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor") - )$df_with_stats - message("Calculating deletion strain(s) interactions scores") - results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug")) - df_calculations <- results$calculations - df_interactions <- results$interactions - df_interactions_joined <- results$full_data - write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) - write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) + # message("Calculating deletion strain(s) summary statistics") + # df_deletion_stats <- calculate_summary_stats( + # df = df_deletion, + # variables = c("L", "K", "r", "AUC"), + # group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor") + # )$df_with_stats + + # message("Calculating deletion strain(s) interactions scores") + # results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug")) + # df_calculations <- results$calculations + # df_interactions <- results$interactions + # df_interactions_joined <- results$full_data + # write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) + # write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) - # Create interaction plots message("Generating reference interaction plots") - reference_plot_configs <- generate_interaction_plot_configs(df_interactions_reference_joined, df_bg_stats, "reference") + reference_plot_configs <- generate_interaction_plot_configs(df_interactions_reference_joined, "reference") generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs) message("Generating deletion interaction plots") - deletion_plot_configs <- generate_interaction_plot_configs(df_interactions_joined, df_bg_stats, "deletion") + deletion_plot_configs <- generate_interaction_plot_configs(df_interactions_joined, "deletion") generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs) message("Writing enhancer/suppressor csv files") @@ -1495,7 +1509,7 @@ main <- function() { message("Generating rank plots") rank_plot_configs <- generate_rank_plot_configs( - df_interactions, + df_interactions_joined, is_lm = FALSE, adjust = TRUE ) @@ -1504,37 +1518,16 @@ main <- function() { message("Generating ranked linear model plots") rank_lm_plot_configs <- generate_rank_plot_configs( - df_interactions, + df_interactions_joined, is_lm = TRUE, adjust = TRUE ) generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm", plot_configs = rank_lm_plot_configs) - overlap_threshold <- 2 # TODO add to study config? - df_interactions_filtered <- df_interactions %>% - filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>% - mutate( - Overlap = case_when( - Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both", - Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both", - Z_lm_L >= overlap_threshold & Avg_Zscore_L <= overlap_threshold ~ "Deletion Enhancer lm only", - Z_lm_L <= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only", - Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= -overlap_threshold ~ "Deletion Suppressor lm only", - Z_lm_L >= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only", - Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score", - Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score", - TRUE ~ "No Effect" - ), - lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared, - lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared, - lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared, - lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared - ) - message("Generating filtered ranked plots") rank_plot_filtered_configs <- generate_rank_plot_configs( - df_interactions_filtered, + df_interactions_joined, is_lm = FALSE, adjust = FALSE, overlap_color = TRUE @@ -1546,7 +1539,7 @@ main <- function() { message("Generating filtered ranked linear model plots") rank_plot_lm_filtered_configs <- generate_rank_plot_configs( - df_interactions_filtered, + df_interactions_joined, is_lm = TRUE, adjust = FALSE, overlap_color = TRUE @@ -1558,7 +1551,7 @@ main <- function() { message("Generating correlation curve parameter pair plots") correlation_plot_configs <- generate_correlation_plot_configs( - df_interactions_filtered + df_interactions_joined ) generate_and_save_plots( out_dir = out_dir,