diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index ee6a70af..83c34abd 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -184,7 +184,6 @@ update_gene_names <- function(df, sgd_gene_list) { } calculate_summary_stats <- function(df, variables, group_vars) { - summary_stats <- df %>% group_by(across(all_of(group_vars))) %>% summarise( @@ -212,12 +211,12 @@ calculate_summary_stats <- function(df, variables, group_vars) { return(list(summary_stats = summary_stats, df_with_stats = df_joined)) } -calculate_interaction_scores <- function(df, max_conc, bg_stats, - group_vars = c("OrfRep", "Gene", "num")) { - +calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, overlap_threshold = 2) { + # Calculate total concentration variables total_conc_num <- length(unique(df$conc_num)) - + + # Initial calculations calculations <- df %>% group_by(across(all_of(group_vars))) %>% mutate( @@ -225,20 +224,24 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, DB = sum(DB, na.rm = TRUE), SM = sum(SM, na.rm = TRUE), num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1, - + # Calculate raw data Raw_Shift_L = first(mean_L) - bg_stats$mean_L, Raw_Shift_K = first(mean_K) - bg_stats$mean_K, Raw_Shift_r = first(mean_r) - bg_stats$mean_r, Raw_Shift_AUC = first(mean_AUC) - bg_stats$mean_AUC, - Z_Shift_L = first(Raw_Shift_L) / bg_stats$sd_L, - Z_Shift_K = first(Raw_Shift_K) / bg_stats$sd_K, - Z_Shift_r = first(Raw_Shift_r) / bg_stats$sd_r, - Z_Shift_AUC = first(Raw_Shift_AUC) / bg_stats$sd_AUC, + Z_Shift_L = Raw_Shift_L / bg_stats$sd_L, + Z_Shift_K = Raw_Shift_K / bg_stats$sd_K, + Z_Shift_r = Raw_Shift_r / bg_stats$sd_r, + Z_Shift_AUC = Raw_Shift_AUC / bg_stats$sd_AUC, + + # Expected values Exp_L = WT_L + Raw_Shift_L, Exp_K = WT_K + Raw_Shift_K, Exp_r = WT_r + Raw_Shift_r, Exp_AUC = WT_AUC + Raw_Shift_AUC, + + # Deltas Delta_L = mean_L - Exp_L, Delta_K = mean_K - Exp_K, Delta_r = mean_r - Exp_r, @@ -248,19 +251,24 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r), Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC), Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L), - + # Calculate Z-scores Zscore_L = Delta_L / WT_sd_L, Zscore_K = Delta_K / WT_sd_K, Zscore_r = Delta_r / WT_sd_r, - Zscore_AUC = Delta_AUC / WT_sd_AUC, - - # Fit linear models and store in list-columns - gene_lm_L = list(lm(Delta_L ~ conc_num_factor, data = pick(everything()))), - gene_lm_K = list(lm(Delta_K ~ conc_num_factor, data = pick(everything()))), - gene_lm_r = list(lm(Delta_r ~ conc_num_factor, data = pick(everything()))), - gene_lm_AUC = list(lm(Delta_AUC ~ conc_num_factor, data = pick(everything()))), - + Zscore_AUC = Delta_AUC / WT_sd_AUC + ) + + # Fit linear models per group + lm_results <- calculations %>% + nest() %>% + mutate( + # Fit linear models + gene_lm_L = map(data, ~ lm(Delta_L ~ conc_num_factor, data = .x)), + gene_lm_K = map(data, ~ lm(Delta_K ~ conc_num_factor, data = .x)), + gene_lm_r = map(data, ~ lm(Delta_r ~ conc_num_factor, data = .x)), + gene_lm_AUC = map(data, ~ lm(Delta_AUC ~ conc_num_factor, data = .x)), + # Extract coefficients using purrr::map_dbl lm_intercept_L = map_dbl(gene_lm_L, ~ coef(.x)[1]), lm_slope_L = map_dbl(gene_lm_L, ~ coef(.x)[2]), @@ -270,129 +278,152 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, lm_slope_r = map_dbl(gene_lm_r, ~ coef(.x)[2]), lm_intercept_AUC = map_dbl(gene_lm_AUC, ~ coef(.x)[1]), lm_slope_AUC = map_dbl(gene_lm_AUC, ~ coef(.x)[2]), - + + # Calculate R-squared values for Delta_ models + R_Squared_L = map_dbl(gene_lm_L, ~ summary(.x)$r.squared), + R_Squared_K = map_dbl(gene_lm_K, ~ summary(.x)$r.squared), + R_Squared_r = map_dbl(gene_lm_r, ~ summary(.x)$r.squared), + R_Squared_AUC = map_dbl(gene_lm_AUC, ~ summary(.x)$r.squared), + # Calculate lm_Score_* based on coefficients lm_Score_L = max_conc * lm_slope_L + lm_intercept_L, lm_Score_K = max_conc * lm_slope_K + lm_intercept_K, lm_Score_r = max_conc * lm_slope_r + lm_intercept_r, - lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC, - - # Calculate R-squared values - R_Squared_L = map_dbl(gene_lm_L, ~ summary(.x)$r.squared), - R_Squared_K = map_dbl(gene_lm_K, ~ summary(.x)$r.squared), - R_Squared_r = map_dbl(gene_lm_r, ~ summary(.x)$r.squared), - R_Squared_AUC = map_dbl(gene_lm_AUC, ~ summary(.x)$r.squared) + lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC ) %>% + select(-data, -starts_with("gene_lm_")) %>% ungroup() - + + # Merge lm_results back into calculations + calculations <- calculations %>% + left_join(lm_results, by = group_vars) + # Calculate overall mean and SD for lm_Score_* variables - lm_means_sds <- calculations %>% + gene_lm_means <- lm_results %>% summarise( - lm_mean_L = mean(lm_Score_L, na.rm = TRUE), - lm_sd_L = sd(lm_Score_L, na.rm = TRUE), - lm_mean_K = mean(lm_Score_K, na.rm = TRUE), - lm_sd_K = sd(lm_Score_K, na.rm = TRUE), - lm_mean_r = mean(lm_Score_r, na.rm = TRUE), - lm_sd_r = sd(lm_Score_r, na.rm = TRUE), - lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE), - lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE) + L = mean(lm_Score_L, na.rm = TRUE), + K = mean(lm_Score_K, na.rm = TRUE), + r = mean(lm_Score_r, na.rm = TRUE), + AUC = mean(lm_Score_AUC, na.rm = TRUE) ) - + + gene_lm_sds <- lm_results %>% + summarise( + L = sd(lm_Score_L, na.rm = TRUE), + K = sd(lm_Score_K, na.rm = TRUE), + r = sd(lm_Score_r, na.rm = TRUE), + AUC = sd(lm_Score_AUC, na.rm = TRUE) + ) + + # Calculate gene Z-scores calculations <- calculations %>% mutate( - Z_lm_L = (lm_Score_L - lm_means_sds$lm_mean_L) / lm_means_sds$lm_sd_L, - Z_lm_K = (lm_Score_K - lm_means_sds$lm_mean_K) / lm_means_sds$lm_sd_K, - Z_lm_r = (lm_Score_r - lm_means_sds$lm_mean_r) / lm_means_sds$lm_sd_r, - Z_lm_AUC = (lm_Score_AUC - lm_means_sds$lm_mean_AUC) / lm_means_sds$lm_sd_AUC + Z_lm_L = (lm_Score_L - gene_lm_means$L) / gene_lm_sds$L, + Z_lm_K = (lm_Score_K - gene_lm_means$K) / gene_lm_sds$K, + Z_lm_r = (lm_Score_r - gene_lm_means$r) / gene_lm_sds$r, + Z_lm_AUC = (lm_Score_AUC - gene_lm_means$AUC) / gene_lm_sds$AUC ) - - # Summarize some of the stats + + # Build summary stats (interactions) interactions <- calculations %>% group_by(across(all_of(group_vars))) %>% - mutate( - # Calculate raw shifts + summarise( + # Calculate average Z-scores + Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs), + Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs), + Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(num_non_removed_concs), + Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / first(num_non_removed_concs), + + # Interaction Z-scores + Z_lm_L = first(Z_lm_L), + Z_lm_K = first(Z_lm_K), + Z_lm_r = first(Z_lm_r), + Z_lm_AUC = first(Z_lm_AUC), + + # Raw Shifts Raw_Shift_L = first(Raw_Shift_L), Raw_Shift_K = first(Raw_Shift_K), Raw_Shift_r = first(Raw_Shift_r), Raw_Shift_AUC = first(Raw_Shift_AUC), - - # Calculate Z-shifts + + # Z Shifts Z_Shift_L = first(Z_Shift_L), Z_Shift_K = first(Z_Shift_K), Z_Shift_r = first(Z_Shift_r), Z_Shift_AUC = first(Z_Shift_AUC), - - # Sum Z-scores - Sum_Z_Score_L = sum(Zscore_L), - Sum_Z_Score_K = sum(Zscore_K), - Sum_Z_Score_r = sum(Zscore_r), - Sum_Z_Score_AUC = sum(Zscore_AUC), - - # Calculate Average Z-scores - Avg_Zscore_L = Sum_Z_Score_L / num_non_removed_concs, - Avg_Zscore_K = Sum_Z_Score_K / num_non_removed_concs, - Avg_Zscore_r = Sum_Z_Score_r / (total_conc_num - 1), - Avg_Zscore_AUC = Sum_Z_Score_AUC / (total_conc_num - 1) + + # Include NG, DB, SM + NG = first(NG), + DB = first(DB), + SM = first(SM) ) %>% arrange(desc(Z_lm_L), desc(NG)) %>% ungroup() - - # Declare column order for output - calculations <- calculations %>% - select( - "OrfRep", "Gene", "num", "N", - "conc_num", "conc_num_factor", "conc_num_factor_factor", - "mean_L", "mean_K", "mean_r", "mean_AUC", - "median_L", "median_K", "median_r", "median_AUC", - "sd_L", "sd_K", "sd_r", "sd_AUC", - "se_L", "se_K", "se_r", "se_AUC", - "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", - "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", - "WT_L", "WT_K", "WT_r", "WT_AUC", - "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC", - "Exp_L", "Exp_K", "Exp_r", "Exp_AUC", - "Delta_L", "Delta_K", "Delta_r", "Delta_AUC", - "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC", - "NG", "SM", "DB" - ) - + + # Calculate overlap interactions <- interactions %>% - select( - "OrfRep", "Gene", "num", "NG", "DB", "SM", - "conc_num", "conc_num_factor", "conc_num_factor_factor", - "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", - "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", - "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC", - "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC", - "lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC", - "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", - "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC" + mutate( + Overlap = case_when( + Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both", + Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both", + Z_lm_L >= overlap_threshold & Avg_Zscore_L < overlap_threshold ~ "Deletion Enhancer lm only", + Z_lm_L < overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only", + Z_lm_L <= -overlap_threshold & Avg_Zscore_L > -overlap_threshold ~ "Deletion Suppressor lm only", + Z_lm_L > -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only", + Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score", + Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score", + TRUE ~ "No Effect" + ) ) - - # Clean the original dataframe by removing overlapping columns - cleaned_df <- df %>% - select(-any_of( - setdiff(intersect(names(df), names(calculations)), - c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor")))) - - # Join the original dataframe with calculations - df_with_calculations <- left_join(cleaned_df, calculations, - by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor")) - - # Remove overlapping columns between df_with_calculations and interactions - df_with_calculations_clean <- df_with_calculations %>% - select(-any_of( - setdiff(intersect(names(df_with_calculations), names(interactions)), - c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor")))) - - # Join with interactions to create the full dataset - full_data <- left_join(df_with_calculations_clean, interactions, - by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor")) - + + # Fit correlation models between Z_lm_* and Avg_Zscore_* (same-variable) + correlation_lms_same <- list( + L = lm(Z_lm_L ~ Avg_Zscore_L, data = interactions), + K = lm(Z_lm_K ~ Avg_Zscore_K, data = interactions), + r = lm(Z_lm_r ~ Avg_Zscore_r, data = interactions), + AUC = lm(Z_lm_AUC ~ Avg_Zscore_AUC, data = interactions) + ) + + # Extract correlation statistics for same-variable correlations + correlation_stats_same <- map(correlation_lms_same, ~ { + list( + intercept = coef(.x)[1], + slope = coef(.x)[2], + r_squared = summary(.x)$r.squared + ) + }) + + # Fit additional correlation models between different Z_lm_* variables + correlation_lms_diff <- list( + L_vs_K = lm(Z_lm_K ~ Z_lm_L, data = interactions), + L_vs_r = lm(Z_lm_r ~ Z_lm_L, data = interactions), + L_vs_AUC = lm(Z_lm_AUC ~ Z_lm_L, data = interactions), + K_vs_r = lm(Z_lm_r ~ Z_lm_K, data = interactions), + K_vs_AUC = lm(Z_lm_AUC ~ Z_lm_K, data = interactions), + r_vs_AUC = lm(Z_lm_AUC ~ Z_lm_r, data = interactions) + ) + + # Extract correlation statistics for different-variable correlations + correlation_stats_diff <- map(correlation_lms_diff, ~ { + list( + intercept = coef(.x)[1], + slope = coef(.x)[2], + r_squared = summary(.x)$r.squared + ) + }) + + # Combine all correlation stats + correlation_stats <- c(correlation_stats_same, correlation_stats_diff) + + # Prepare full_data by merging interactions back into calculations + full_data <- calculations %>% + left_join(interactions, by = group_vars) + return(list( calculations = calculations, interactions = interactions, - full_data = full_data + full_data = full_data, + correlation_stats = correlation_stats )) } @@ -577,32 +608,11 @@ generate_scatter_plot <- function(plot, config) { # Add error bars if specified if (!is.null(config$error_bar) && config$error_bar && !is.null(config$y_var)) { if (!is.null(config$error_bar_params)) { - # Error bar params are constants, so set them outside aes - plot <- plot + - geom_errorbar( - aes( - ymin = !!sym(config$y_var), # y_var mapped to y-axis - ymax = !!sym(config$y_var) - ), - ymin = config$error_bar_params$ymin, # Constant values - ymax = config$error_bar_params$ymax, # Constant values - alpha = 0.3, - linewidth = 0.5 - ) + plot <- plot + geom_errorbar(aes(ymin = config$error_bar_params$ymin, ymax = config$error_bar_params$ymax)) } else { - # Dynamically generate ymin and ymax based on column names y_mean_col <- paste0("mean_", config$y_var) y_sd_col <- paste0("sd_", config$y_var) - - plot <- plot + - geom_errorbar( - aes( - ymin = !!sym(y_mean_col) - !!sym(y_sd_col), # Calculating ymin in aes - ymax = !!sym(y_mean_col) + !!sym(y_sd_col) # Calculating ymax in aes - ), - alpha = 0.3, - linewidth = 0.5 - ) + plot <- plot + geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col), ymax = !!sym(y_mean_col) + !!sym(y_sd_col))) } } @@ -711,7 +721,18 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df return(list(plots = plot_configs)) } -generate_interaction_plot_configs <- function(df, plot_type = "reference") { +generate_interaction_plot_configs <- function(df, type) { + + if (type == "reference") { + group_vars <- c("OrfRep", "Gene", "num") + df <- df %>% + mutate(OrfRepCombined = do.call(paste, c(across(all_of(group_vars)), sep = "_"))) + } else if (type == "deletion") { + group_vars <- c("OrfRep", "Gene") + df <- df %>% + mutate(OrfRepCombined = OrfRep) + } + limits_map <- list( L = c(0, 130), K = c(-20, 160), @@ -720,47 +741,50 @@ generate_interaction_plot_configs <- function(df, plot_type = "reference") { ) delta_limits_map <- list( - Delta_L = c(-60, 60), - Delta_K = c(-60, 60), - Delta_r = c(-0.6, 0.6), - Delta_AUC = c(-6000, 6000) + L = c(-60, 60), + K = c(-60, 60), + r = c(-0.6, 0.6), + AUC = c(-6000, 6000) ) - group_vars <- if (plot_type == "reference") c("OrfRep", "Gene", "num") else c("OrfRep", "Gene") - - df_filtered <- df %>% - mutate(OrfRepCombined = if (plot_type == "reference") paste(OrfRep, Gene, num, sep = "_") else paste(OrfRep, Gene, sep = "_")) - overall_plot_configs <- list() delta_plot_configs <- list() - # Overall plots + # Overall plots with lm_line for each interaction for (var in names(limits_map)) { y_limits <- limits_map[[var]] + + # Use the pre-calculated lm intercept and slope from the dataframe + lm_intercept_col <- paste0("lm_intercept_", var) + lm_slope_col <- paste0("lm_slope_", var) plot_config <- list( - df = df_filtered, + df = df, plot_type = "scatter", x_var = "conc_num_factor_factor", y_var = var, - x_label = unique(df_filtered$Drug)[1], + x_label = unique(df$Drug)[1], title = sprintf("Scatter RF for %s with SD", var), coord_cartesian = y_limits, error_bar = TRUE, - x_breaks = unique(df_filtered$conc_num_factor_factor), - x_labels = as.character(unique(df_filtered$conc_num)), + x_breaks = unique(df$conc_num_factor_factor), + x_labels = as.character(unique(df$conc_num)), position = "jitter", - smooth = TRUE + smooth = TRUE, + lm_line = list( + intercept = mean(df[[lm_intercept_col]], na.rm = TRUE), + slope = mean(df[[lm_slope_col]], na.rm = TRUE) + ) ) overall_plot_configs <- append(overall_plot_configs, list(plot_config)) } - # Delta plots - unique_groups <- df_filtered %>% select(all_of(group_vars)) %>% distinct() + # Delta plots (add lm_line if necessary) + unique_groups <- df %>% select(all_of(group_vars)) %>% distinct() for (i in seq_len(nrow(unique_groups))) { group <- unique_groups[i, ] - group_data <- df_filtered %>% semi_join(group, by = group_vars) + group_data <- df %>% semi_join(group, by = group_vars) OrfRep <- as.character(group$OrfRep) Gene <- if ("Gene" %in% names(group)) as.character(group$Gene) else "" @@ -770,13 +794,12 @@ generate_interaction_plot_configs <- function(df, plot_type = "reference") { y_limits <- delta_limits_map[[var]] y_span <- y_limits[2] - y_limits[1] - WT_sd_var <- paste0("WT_sd_", sub("Delta_", "", var)) - WT_sd_value <- group_data[[WT_sd_var]][1] - error_bar_ymin <- 0 - (2 * WT_sd_value) - error_bar_ymax <- 0 + (2 * WT_sd_value) + # For error bars + WT_sd_value <- group_data[[paste0("WT_sd_", var)]][1] - Z_Shift_value <- round(group_data[[paste0("Z_Shift_", sub("Delta_", "", var))]][1], 2) - Z_lm_value <- round(group_data[[paste0("Z_lm_", sub("Delta_", "", var))]][1], 2) + Z_Shift_value <- round(group_data[[paste0("Z_Shift_", var)]][1], 2) + Z_lm_value <- round(group_data[[paste0("Z_lm_", var)]][1], 2) + R_squared_value <- round(group_data[[paste0("R_squared_", var)]][1], 2) NG_value <- group_data$NG[1] DB_value <- group_data$DB[1] SM_value <- group_data$SM[1] @@ -784,37 +807,48 @@ generate_interaction_plot_configs <- function(df, plot_type = "reference") { annotations <- list( list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)), list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste("lm ZScore =", Z_lm_value)), + list(x = 1, y = y_limits[2] - 0.4 * y_span, label = paste("R-squared =", R_squared_value)), list(x = 1, y = y_limits[1] + 0.2 * y_span, label = paste("NG =", NG_value)), list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("DB =", DB_value)), list(x = 1, y = y_limits[1], label = paste("SM =", SM_value)) ) + # Delta plot configuration with lm_line if needed plot_config <- list( df = group_data, plot_type = "scatter", x_var = "conc_num_factor_factor", y_var = var, x_label = unique(group_data$Drug)[1], - title = paste(OrfRep, Gene, sep = " "), + title = paste(OrfRepCombined, Gene, sep = " "), coord_cartesian = y_limits, annotations = annotations, error_bar = TRUE, error_bar_params = list( - ymin = error_bar_ymin, - ymax = error_bar_ymax + ymin = 0 - (2 * WT_sd_value), + ymax = 0 + (2 * WT_sd_value) ), smooth = TRUE, x_breaks = unique(group_data$conc_num_factor_factor), x_labels = as.character(unique(group_data$conc_num)), - ylim_vals = y_limits + ylim_vals = y_limits, + lm_line = list( + intercept = group_data[[lm_intercept_col]][1], + slope = group_data[[lm_slope_col]][1] + ) ) delta_plot_configs <- append(delta_plot_configs, list(plot_config)) } } + # Calculate dynamic grid layout based on the number of plots for the delta_L plots + grid_ncol <- 4 + num_plots <- length(delta_plot_configs) + grid_nrow <- ceiling(num_plots / grid_ncol) + return(list( list(grid_layout = list(ncol = 2, nrow = 2), plots = overall_plot_configs), - list(grid_layout = list(ncol = 4, nrow = 3), plots = delta_plot_configs) + list(grid_layout = list(ncol = 4, nrow = grid_nrow), plots = delta_plot_configs) )) } @@ -902,44 +936,66 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA return(list(grid_layout = list(ncol = grid_ncol, nrow = grid_nrow), plots = plot_configs)) } -generate_correlation_plot_configs <- function(df, highlight_cyan = FALSE) { +generate_correlation_plot_configs <- function(df, correlation_stats) { + # Define relationships for different-variable correlations relationships <- list( - list(x = "Z_lm_L", y = "Z_lm_K", label = "Interaction L vs. Interaction K"), - list(x = "Z_lm_L", y = "Z_lm_r", label = "Interaction L vs. Interaction r"), - list(x = "Z_lm_L", y = "Z_lm_AUC", label = "Interaction L vs. Interaction AUC"), - list(x = "Z_lm_K", y = "Z_lm_r", label = "Interaction K vs. Interaction r"), - list(x = "Z_lm_K", y = "Z_lm_AUC", label = "Interaction K vs. Interaction AUC"), - list(x = "Z_lm_r", y = "Z_lm_AUC", label = "Interaction r vs. Interaction AUC") + list(x = "L", y = "K"), + list(x = "L", y = "r"), + list(x = "L", y = "AUC"), + list(x = "K", y = "r"), + list(x = "K", y = "AUC"), + list(x = "r", y = "AUC") ) plot_configs <- list() - for (rel in relationships) { - lm_model <- lm(as.formula(paste(rel$y, "~", rel$x)), data = df) - r_squared <- summary(lm_model)$r.squared + # Iterate over the option to highlight cyan points (TRUE/FALSE) + highlight_cyan_options <- c(FALSE, TRUE) - plot_config <- list( - df = df, - x_var = rel$x, - y_var = rel$y, - plot_type = "scatter", - title = rel$label, - annotations = list( - list( - x = mean(df[[rel$x]], na.rm = TRUE), - y = mean(df[[rel$y]], na.rm = TRUE), - label = paste("R-squared =", round(r_squared, 3))) - ), - smooth = TRUE, - smooth_color = "tomato3", - lm_line = list(intercept = coef(lm_model)[1], slope = coef(lm_model)[2]), - shape = 3, - size = 0.5, - color_var = "Overlap", - cyan_points = highlight_cyan - ) + for (highlight_cyan in highlight_cyan_options) { + for (rel in relationships) { + # Extract relevant variable names for Z_lm values + x_var <- paste0("Z_lm_", rel$x) + y_var <- paste0("Z_lm_", rel$y) - plot_configs <- append(plot_configs, list(plot_config)) + # Access the correlation statistics from the correlation_stats list + relationship_name <- paste0(rel$x, "_vs_", rel$y) # Example: L_vs_K + stats <- correlation_stats[[relationship_name]] + intercept <- stats$intercept + slope <- stats$slope + r_squared <- stats$r_squared + + # Generate the label for the plot + plot_label <- paste("Interaction", rel$x, "vs.", rel$y) + + # Construct plot config + plot_config <- list( + df = df, + x_var = x_var, + y_var = y_var, + plot_type = "scatter", + title = plot_label, + annotations = list( + list( + x = mean(df[[x_var]], na.rm = TRUE), + y = mean(df[[y_var]], na.rm = TRUE), + label = paste("R-squared =", round(r_squared, 3)) + ) + ), + smooth = TRUE, + smooth_color = "tomato3", + lm_line = list( + intercept = intercept, + slope = slope + ), + shape = 3, + size = 0.5, + color_var = "Overlap", + cyan_points = highlight_cyan # Include cyan points or not based on the loop + ) + + plot_configs <- append(plot_configs, list(plot_config)) + } } return(list(plots = plot_configs)) @@ -1041,7 +1097,7 @@ main <- function() { file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"), row.names = FALSE) - # Each plots list corresponds to a file + # Each list of plots corresponds to a file l_vs_k_plot_configs <- list( plots = list( list( @@ -1147,7 +1203,7 @@ main <- function() { plot_type = "scatter", title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc", color_var = "conc_num_factor_factor", - position = "jitter", # Apply jitter for better visibility + position = "jitter", tooltip_vars = c("OrfRep", "Gene", "delta_bg"), annotations = list( list( @@ -1171,7 +1227,7 @@ main <- function() { plot_type = "scatter", title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc", color_var = "conc_num_factor_factor", - position = "jitter", # Apply jitter for better visibility + position = "jitter", tooltip_vars = c("OrfRep", "Gene", "delta_bg"), annotations = list( list( @@ -1187,7 +1243,7 @@ main <- function() { ) message("Generating quality control plots in parallel") - # # future::plan(future::multicore, workers = parallel::detectCores()) + # future::plan(future::multicore, workers = parallel::detectCores()) future::plan(future::multisession, workers = 3) # generate 3 plots in parallel plot_configs <- list( @@ -1298,11 +1354,11 @@ main <- function() { # Create interaction plots message("Generating reference interaction plots") - reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined, plot_type = "reference") + reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined, "reference") generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs) message("Generating deletion interaction plots") - deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined, plot_type = "deletion") + deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined, "deletion") generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs) # Define conditions for enhancers and suppressors @@ -1372,29 +1428,6 @@ main <- function() { generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm", plot_configs = rank_lm_plot_configs) - message("Filtering and reranking plots") - interaction_threshold <- 2 # TODO add to study config? - # Formerly X_NArm - zscore_interactions_filtered <- zscore_interactions_joined %>% - filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>% - mutate( - Overlap = case_when( - Z_lm_L >= interaction_threshold & Avg_Zscore_L >= interaction_threshold ~ "Deletion Enhancer Both", - Z_lm_L <= -interaction_threshold & Avg_Zscore_L <= -interaction_threshold ~ "Deletion Suppressor Both", - Z_lm_L >= interaction_threshold & Avg_Zscore_L <= interaction_threshold ~ "Deletion Enhancer lm only", - Z_lm_L <= interaction_threshold & Avg_Zscore_L >= interaction_threshold ~ "Deletion Enhancer Avg Zscore only", - Z_lm_L <= -interaction_threshold & Avg_Zscore_L >= -interaction_threshold ~ "Deletion Suppressor lm only", - Z_lm_L >= -interaction_threshold & Avg_Zscore_L <= -interaction_threshold ~ "Deletion Suppressor Avg Zscore only", - Z_lm_L >= interaction_threshold & Avg_Zscore_L <= -interaction_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score", - Z_lm_L <= -interaction_threshold & Avg_Zscore_L >= interaction_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score", - TRUE ~ "No Effect" - ), - lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared, - lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared, - lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared, - lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared - ) - message("Generating filtered ranked plots") rank_plot_filtered_configs <- generate_rank_plot_configs( df = zscore_interactions_filtered, @@ -1430,3 +1463,6 @@ main <- function() { }) } main() + +# For future simplification of joined dataframes +# df_joined <- left_join(cleaned_df, summary_stats, by = group_vars, suffix = c("_original", "_stats")) \ No newline at end of file