From d456bcdaddc8c90b77f2df67e6efbf3622eee05a Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Tue, 8 Oct 2024 02:16:35 -0400 Subject: [PATCH] Refactor calculations to try and grouping issues --- .../apps/r/calculate_interaction_zscores.R | 589 +++++++++--------- 1 file changed, 278 insertions(+), 311 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 21321304..aa9ad262 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -177,7 +177,7 @@ update_gene_names <- function(df, sgd_gene_list) { } calculate_summary_stats <- function(df, variables, group_vars) { - summary_stats <- df %>% + ss <- df %>% group_by(across(all_of(group_vars))) %>% summarise( N = n(), @@ -192,16 +192,19 @@ calculate_summary_stats <- function(df, variables, group_vars) { ), .names = "{.fn}_{.col}" ), + sum_NG = sum(NG, na.rm = TRUE), + sum_DB = sum(DB, na.rm = TRUE), + sum_SM = sum(SM, na.rm = TRUE), .groups = "drop" ) # Create a cleaned version of df that doesn't overlap with summary_stats df_cleaned <- df %>% - select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars))) + select(-any_of(setdiff(intersect(names(df), names(ss)), group_vars))) - df_joined <- left_join(df_cleaned, summary_stats, by = group_vars) + ss_joined <- left_join(df_cleaned, ss, by = group_vars) - return(list(summary_stats = summary_stats, df_with_stats = df_joined)) + return(list(stats_csv = ss, stats_joined = ss_joined)) } calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) { @@ -217,11 +220,12 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) group_vars <- c("OrfRep", "Gene", "Drug") } - perform_lm <- function(x, y, max_conc) { - if (all(is.na(x)) || all(is.na(y)) || length(x[!is.na(x)]) == 0 || length(y[!is.na(y)]) == 0) { + perform_lm <- function(response, predictor, max_conc) { + if (all(is.na(response)) || all(is.na(predictor)) || + length(response[!is.na(response)]) == 0 || length(predictor[!is.na(predictor)]) == 0) { return(list(intercept = NA, slope = NA, r_squared = NA, score = NA)) } else { - fit <- lm(y ~ x) + fit <- lm(response ~ predictor) return(list( intercept = coef(fit)[1], slope = coef(fit)[2], @@ -247,98 +251,69 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) ) # Join WT statistics to df - df <- df %>% - left_join(wt_stats, by = c(bg_group_vars)) + df <- df %>% left_join(wt_stats, by = bg_group_vars) # Compute mean values at zero concentration - mean_zeroes <- df %>% - filter(conc_num == 0) %>% + df_mean_zeros <- df %>% group_by(across(all_of(group_vars))) %>% - summarise( + filter(conc_num == 0) %>% + summarize( mean_L_zero = mean(mean_L, na.rm = TRUE), mean_K_zero = mean(mean_K, na.rm = TRUE), mean_r_zero = mean(mean_r, na.rm = TRUE), mean_AUC_zero = mean(mean_AUC, na.rm = TRUE), - .groups = "drop" + .groups = "drop" # Ungroup after summarizing ) - df <- df %>% - left_join(mean_zeroes, by = c(group_vars)) - - # Calculate Raw Shifts and Z Shifts for all rows - df <- df %>% - mutate( - Raw_Shift_L = mean_L_zero - WT_L, - Raw_Shift_K = mean_K_zero - WT_K, - Raw_Shift_r = mean_r_zero - WT_r, - Raw_Shift_AUC = mean_AUC_zero - WT_AUC, - Z_Shift_L = Raw_Shift_L / WT_sd_L, - Z_Shift_K = Raw_Shift_K / WT_sd_K, - Z_Shift_r = Raw_Shift_r / WT_sd_r, - Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC - ) - + df <- df %>% left_join(df_mean_zeros, by = group_vars) + + # Calculate per-row data calculations <- df %>% - group_by(across(all_of(c(group_vars, "conc_num", "conc_num_factor", "conc_num_factor_factor")))) %>% - mutate( - NG_sum = sum(NG, na.rm = TRUE), - DB_sum = sum(DB, na.rm = TRUE), - SM_sum = sum(SM, na.rm = TRUE), - - # Expected values - Exp_L = WT_L + Raw_Shift_L, - Exp_K = WT_K + Raw_Shift_K, - Exp_r = WT_r + Raw_Shift_r, - Exp_AUC = WT_AUC + Raw_Shift_AUC, - - # Deltas - Delta_L = mean_L - Exp_L, - Delta_K = mean_K - Exp_K, - Delta_r = mean_r - Exp_r, - Delta_AUC = mean_AUC - Exp_AUC, - - # Adjust deltas for NG and SM - Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L), - Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), - Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r), - Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC), - Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L), - - # Calculate Z-scores - Zscore_L = Delta_L / WT_sd_L, - Zscore_K = Delta_K / WT_sd_K, - Zscore_r = Delta_r / WT_sd_r, - Zscore_AUC = Delta_AUC / WT_sd_AUC - ) %>% - ungroup() - - # For interaction plot error bars - delta_means_sds <- calculations %>% group_by(across(all_of(group_vars))) %>% - summarise( - mean_Delta_L = mean(Delta_L, na.rm = TRUE), - mean_Delta_K = mean(Delta_K, na.rm = TRUE), - mean_Delta_r = mean(Delta_r, na.rm = TRUE), - mean_Delta_AUC = mean(Delta_AUC, na.rm = TRUE), - sd_Delta_L = sd(Delta_L, na.rm = TRUE), - sd_Delta_K = sd(Delta_K, na.rm = TRUE), - sd_Delta_r = sd(Delta_r, na.rm = TRUE), - sd_Delta_AUC = sd(Delta_AUC, na.rm = TRUE), - .groups = "drop" - ) - - calculations <- calculations %>% - left_join(delta_means_sds, by = group_vars) + mutate( + Raw_Shift_L = mean_L_zero - WT_L, + Raw_Shift_K = mean_K_zero - WT_K, + Raw_Shift_r = mean_r_zero - WT_r, + Raw_Shift_AUC = mean_AUC_zero - WT_AUC, + Z_Shift_L = Raw_Shift_L / WT_sd_L, + Z_Shift_K = Raw_Shift_K / WT_sd_K, + Z_Shift_r = Raw_Shift_r / WT_sd_r, + Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC, + # Expected values + Exp_L = WT_L + Raw_Shift_L, + Exp_K = WT_K + Raw_Shift_K, + Exp_r = WT_r + Raw_Shift_r, + Exp_AUC = WT_AUC + Raw_Shift_AUC, + # Deltas + Delta_L = mean_L - Exp_L, + Delta_K = mean_K - Exp_K, + Delta_r = mean_r - Exp_r, + Delta_AUC = mean_AUC - Exp_AUC, + # Adjust deltas for NG and SM + Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L), + Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), + Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r), + Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC), + Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L), + # Calculate Z-scores + Zscore_L = Delta_L / WT_sd_L, + Zscore_K = Delta_K / WT_sd_K, + Zscore_r = Delta_r / WT_sd_r, + Zscore_AUC = Delta_AUC / WT_sd_AUC + ) # Calculate group-specific interactions interactions <- calculations %>% group_by(across(all_of(group_vars))) %>% summarise( - NG_sum_int = sum(NG), - DB_sum_int = sum(DB), - SM_sum_int = sum(SM), - num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1, + num_non_removed_concs = total_conc_num - sum(DB) - 1, + # Count QA columns, this will clobber the originals in left join but + # we're not using them after this point so it's cleaner this way + NG = sum(NG, na.rm = TRUE), + DB = sum(DB, na.rm = TRUE), + SM = sum(SM, na.rm = TRUE), + # Add background data Raw_Shift_L = first(Raw_Shift_L), Raw_Shift_K = first(Raw_Shift_K), @@ -356,7 +331,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) Sum_Z_Score_AUC = sum(Zscore_AUC, na.rm = TRUE), # We sum twice but it saves on creating another block - # TODO should we use mean() here, not sure + # TODO should we use mean() here, original logic needs explanation Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs), Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs), Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(total_conc_num - 1), @@ -392,7 +367,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) # Summary statistics for lm scores interactions <- interactions %>% - # group_by(across(all_of(group_vars))) %>% + group_by(across(all_of(group_vars))) %>% mutate( lm_mean_L = mean(lm_Score_L, na.rm = TRUE), lm_sd_L = sd(lm_Score_L, na.rm = TRUE), @@ -407,46 +382,42 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r, Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC ) %>% - arrange(desc(Z_lm_L), desc(NG_sum_int)) + arrange(desc(Z_lm_L), desc(NG)) # Deletion data ranking and linear modeling + # Initialize this variable so we can return it as NULL for reference + # We don't do the following calculations for the reference strain + interactions_narm <- NULL # formerly X_NArm + if (type == "deletion") { interactions <- interactions %>% mutate( - Avg_Zscore_L_adjusted = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L), - Avg_Zscore_K_adjusted = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K), - Avg_Zscore_r_adjusted = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r), - Avg_Zscore_AUC_adjusted = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC), - Z_lm_L_adjusted = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L), - Z_lm_K_adjusted = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K), - Z_lm_r_adjusted = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r), - Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC), - Rank_L = rank(Avg_Zscore_L_adjusted), - Rank_K = rank(Avg_Zscore_K_adjusted), - Rank_r = rank(Avg_Zscore_r_adjusted), - Rank_AUC = rank(Avg_Zscore_AUC_adjusted), - Rank_lm_L = rank(Z_lm_L_adjusted), - Rank_lm_K = rank(Z_lm_K_adjusted), - Rank_lm_r = rank(Z_lm_r_adjusted), - Rank_lm_AUC = rank(Z_lm_AUC_adjusted), - Rank_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)), - Rank_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)), - Rank_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)), - Rank_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)), - Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)), - Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)), - Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)), - Correlation_lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)), - Correlation_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)), - Correlation_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)), - Correlation_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)), - Correlation_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)), - Correlation_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)), - Correlation_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc)) + # Set NA values to a small value to include them in ranking + Avg_Zscore_adjusted_L = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L), + Avg_Zscore_adjusted_K = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K), + Avg_Zscore_adjusted_r = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r), + Avg_Zscore_adjusted_AUC = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC), + Z_lm_adjusted_L = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L), + Z_lm_adjusted_K = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K), + Z_lm_adjusted_r = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r), + Z_lm_adjusted_AUC = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC), + Rank_L = rank(Avg_Zscore_adjusted_L), + Rank_K = rank(Avg_Zscore_adjusted_K), + Rank_r = rank(Avg_Zscore_adjusted_r), + Rank_AUC = rank(Avg_Zscore_adjusted_AUC), + Rank_lm_L = rank(Z_lm_adjusted_L), + Rank_lm_K = rank(Z_lm_adjusted_K), + Rank_lm_r = rank(Z_lm_adjusted_r), + Rank_lm_AUC = rank(Z_lm_adjusted_AUC), + Rank_lm_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)), + Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)), + Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)), + Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)), ) - # Add overlap threshold categories based on Z-lm and Avg-Z scores - interactions <- interactions %>% + # We are filtering invalid Z_lm scores so this must be in its own df + # A left join will just reintroduce NAs by coercion + interactions_narm <- interactions %>% filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L)) %>% mutate( Overlap = case_when( @@ -460,11 +431,32 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore", TRUE ~ "No Effect" ), - - Rank_lm_R_squared_L = Rank_lm_L[[1]]$r_squared, - Rank_lm_R_squared_K = Rank_lm_L[[1]]$r_squared, - Rank_lm_R_squared_r = Rank_lm_r[[1]]$r_squared, - Rank_lm_R_squared_AUC = Rank_lm_AUC[[1]]$r_squared, + Rank_L = rank(Avg_Zscore_L), + Rank_K = rank(Avg_Zscore_K), + Rank_r = rank(Avg_Zscore_r), + Rank_AUC = rank(Avg_Zscore_AUC), + Rank_lm_L = rank(Z_lm_L), + Rank_lm_K = rank(Z_lm_K), + Rank_lm_r = rank(Z_lm_r), + Rank_lm_AUC = rank(Z_lm_AUC), + Rank_lm_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)), + Rank_lm_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)), + Rank_lm_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)), + Rank_lm_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)), + # Rank_lm_R_Squared_L = Rank_lm_lm_L[[1]]$r_squared, + # Rank_lm_R_Squared_K = Rank_lm_lm_K[[1]]$r_squared, + # Rank_lm_R_Squared_r = Rank_lm_lm_r[[1]]$r_squared, + # Rank_lm_R_Squared_AUC = Rank_lm_lm_AUC[[1]]$r_squared, + Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)), + Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)), + Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)), + Correlation_lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)), + Correlation_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)), + Correlation_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)), + Correlation_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)), + Correlation_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)), + Correlation_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)), + Correlation_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc)), Correlation_lm_intercept_L = Correlation_lm_L[[1]]$intercept, Correlation_lm_slope_L = Correlation_lm_L[[1]]$slope, Correlation_lm_R_Squared_L = Correlation_lm_L[[1]]$r_squared, @@ -483,97 +475,77 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) Correlation_lm_Score_AUC = Correlation_lm_AUC[[1]]$score, Correlation_lm_intercept_K_L = Correlation_lm_K_L[[1]]$intercept, Correlation_lm_slope_K_L = Correlation_lm_K_L[[1]]$slope, - Correlation_lm_R_squared_K_L = Correlation_lm_K_L[[1]]$r_squared, + Correlation_lm_R_Squared_K_L = Correlation_lm_K_L[[1]]$r_squared, Correlation_lm_Score_K_L = Correlation_lm_K_L[[1]]$score, Correlation_lm_intercept_r_L = Correlation_lm_r_L[[1]]$intercept, Correlation_lm_slope_r_L = Correlation_lm_r_L[[1]]$slope, - Correlation_lm_R_squared_r_L = Correlation_lm_r_L[[1]]$r_squared, + Correlation_lm_R_Squared_r_L = Correlation_lm_r_L[[1]]$r_squared, Correlation_lm_Score_r_L = Correlation_lm_r_L[[1]]$score, Correlation_lm_intercept_AUC_L = Correlation_lm_AUC_L[[1]]$intercept, Correlation_lm_slope_AUC_L = Correlation_lm_AUC_L[[1]]$slope, - Correlation_lm_R_squared_AUC_L = Correlation_lm_AUC_L[[1]]$r_squared, + Correlation_lm_R_Squared_AUC_L = Correlation_lm_AUC_L[[1]]$r_squared, Correlation_lm_Score_AUC_L = Correlation_lm_AUC_L[[1]]$score, Correlation_lm_intercept_r_K = Correlation_lm_r_K[[1]]$intercept, Correlation_lm_slope_r_K = Correlation_lm_r_K[[1]]$slope, - Correlation_lm_R_squared_r_K = Correlation_lm_r_K[[1]]$r_squared, + Correlation_lm_R_Squared_r_K = Correlation_lm_r_K[[1]]$r_squared, Correlation_lm_Score_r_K = Correlation_lm_r_K[[1]]$score, Correlation_lm_intercept_AUC_K = Correlation_lm_AUC_K[[1]]$intercept, Correlation_lm_slope_AUC_K = Correlation_lm_AUC_K[[1]]$slope, - Correlation_lm_R_squared_AUC_K = Correlation_lm_AUC_K[[1]]$r_squared, + Correlation_lm_R_Squared_AUC_K = Correlation_lm_AUC_K[[1]]$r_squared, Correlation_lm_Score_AUC_K = Correlation_lm_AUC_K[[1]]$score, Correlation_lm_intercept_AUC_r = Correlation_lm_AUC_r[[1]]$intercept, Correlation_lm_slope_AUC_r = Correlation_lm_AUC_r[[1]]$slope, - Correlation_lm_R_squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared, + Correlation_lm_R_Squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared, Correlation_lm_Score_AUC_r = Correlation_lm_AUC_r[[1]]$score ) } # Create the final calculations and interactions dataframes with specific columns for csv output # Trying to mimic original output data - df_calculations <- calculations %>% - select(all_of(c( - group_vars, # necessary for full_data left_join - "conc_num", "conc_num_factor", "conc_num_factor_factor", "N", - "mean_L", "median_L", "sd_L", "se_L", - "mean_K", "median_K", "sd_K", "se_K", - "mean_r", "median_r", "sd_r", "se_r", - "mean_AUC", "median_AUC", "sd_AUC", "se_AUC", - "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", - "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", - "WT_L", "WT_K", "WT_r", "WT_AUC", - "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC", - "Exp_L", "Exp_K", "Exp_r", "Exp_AUC", - "Delta_L", "Delta_K", "Delta_r", "Delta_AUC", - "mean_Delta_L", "mean_Delta_K", "mean_Delta_r", "mean_Delta_AUC", - "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC", - "NG_sum", "DB_sum", "SM_sum" - ))) %>% - rename(NG = NG_sum, DB = DB_sum, SM = SM_sum) + calculations_csv <- calculations %>% + select( + all_of(group_vars), + conc_num, conc_num_factor, conc_num_factor_factor, + N, NG, DB, SM, + mean_L, median_L, sd_L, se_L, + mean_K, median_K, sd_K, se_K, + mean_r, median_r, sd_r, se_r, + mean_AUC, median_AUC, sd_AUC, se_AUC, + Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC, + Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC, + WT_L, WT_K, WT_r, WT_AUC, + WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC, + Exp_L, Exp_K, Exp_r, Exp_AUC, + Delta_L, Delta_K, Delta_r, Delta_AUC, + Zscore_L, Zscore_K, Zscore_r, Zscore_AUC + ) - df_interactions <- interactions %>% - select(any_of(c( - group_vars, # necessary for full_data left_join - "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC", - "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC", - "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC", - "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", - "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", - "lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC", - "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", - "NG_sum_int", "DB_sum_int", "SM_sum_int", - "Rank_lm_R_squared_L", "Rank_lm_R_squared_K", "Rank_lm_R_squared_r", "Rank_lm_R_squared_AUC", - "Correlation_lm_intercept_L", "Correlation_lm_slope_L", "Correlation_lm_R_squared_L", "Correlation_lm_Score_L", - "Correlation_lm_intercept_K", "Correlation_lm_slope_K", "Correlation_lm_R_squared_K", "Correlation_lm_Score_K", - "Correlation_lm_intercept_r", "Correlation_lm_slope_r", "Correlation_lm_R_squared_r", "Correlation_lm_Score_r", - "Correlation_lm_intercept_AUC", "Correlation_lm_slope_AUC", "Correlation_lm_R_squared_AUC", "Correlation_lm_Score_AUC", - "Correlation_lm_intercept_K_L", "Correlation_lm_slope_K_L", "Correlation_lm_R_squared_K_L", "Correlation_lm_Score_K_L", - "Correlation_lm_intercept_r_L", "Correlation_lm_slope_r_L", "Correlation_lm_R_squared_r_L", "Correlation_lm_Score_r_L", - "Correlation_lm_intercept_AUC_L", "Correlation_lm_slope_AUC_L", "Correlation_lm_R_squared_AUC_L", "Correlation_lm_Score_AUC_L", - "Correlation_lm_intercept_r_K", "Correlation_lm_slope_r_K", "Correlation_lm_R_squared_r_K", "Correlation_lm_Score_r_K", - "Correlation_lm_intercept_AUC_K", "Correlation_lm_slope_AUC_K", "Correlation_lm_R_squared_AUC_K", "Correlation_lm_Score_AUC_K", - "Correlation_lm_intercept_AUC_r", "Correlation_lm_slope_AUC_r", "Correlation_lm_R_squared_AUC_r", "Correlation_lm_Score_AUC_r", - "Overlap" - ))) %>% - rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int) + interactions_csv <- interactions %>% + select( + all_of(group_vars), + NG, DB, SM, + Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC, + Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC, + Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC, + Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC, + Sum_Z_Score_L, Sum_Z_Score_K, Sum_Z_Score_r, Sum_Z_Score_AUC, + lm_Score_L, lm_Score_K, lm_Score_r, lm_Score_AUC, + R_Squared_L, R_Squared_K, R_Squared_r, R_Squared_AUC + ) - # Avoid column collision on left join for overlapping variables - calculations_no_overlap <- calculations %>% - select(-any_of(c("DB", "NG", "SM", - # Don't need these anywhere so easier to remove - "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", - "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", - # we need these for the interactions but the original code has the same names in both datasets - "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC" - ))) - - full_data <- calculations_no_overlap %>% - left_join(interactions, by = group_vars) + # Filter the calculations dataframe so we don't clobber on left join + calculations_selected <- calculations %>% + select(all_of(c(group_vars, setdiff(names(calculations), names(interactions))))) + + interactions_joined <- left_join(calculations_selected, interactions, by = group_vars) # Return final dataframes return(list( - calculations = df_calculations, - interactions = df_interactions, - full_data = full_data + calculations_csv = calculations_csv, + interactions_csv = interactions_csv, + interactions = interactions, + interactions_joined = interactions_joined, + interactions_narm = interactions_narm )) } @@ -1068,10 +1040,10 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) } for (var in names(delta_limits_map)) { + y_var_name <- paste0("Delta_", var) y_limits <- delta_limits_map[[var]] y_span <- y_limits[2] - y_limits[1] - y_var_name <- paste0("Delta_", var) - + # Anti-filter to select out-of-bounds rows out_of_bounds <- group_data %>% filter(is.na(!!sym(y_var_name)) | @@ -1095,19 +1067,17 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) next # skip plot if insufficient data is available } - WT_sd_value <- first(group_data_filtered[[paste0("WT_sd_", var)]], default = 0) Z_Shift_value <- round(first(group_data_filtered[[paste0("Z_Shift_", var)]], default = 0), 2) Z_lm_value <- round(first(group_data_filtered[[paste0("Z_lm_", var)]], default = 0), 2) - R_squared_value <- round(first(group_data_filtered[[paste0("R_Squared_", var)]], default = 0), 2) - NG_value <- first(group_data_filtered$NG, default = 0) - DB_value <- first(group_data_filtered$DB, default = 0) - SM_value <- first(group_data_filtered$SM, default = 0) + NG <- first(group_data_filtered$NG) + DB <- first(group_data_filtered$DB) + SM <- first(group_data_filtered$SM) lm_intercept_col <- paste0("lm_intercept_", var) lm_slope_col <- paste0("lm_slope_", var) - lm_intercept <- first(group_data_filtered[[lm_intercept_col]], default = 0) - lm_slope <- first(group_data_filtered[[lm_slope_col]], default = 0) + lm_intercept <- first(group_data_filtered[[lm_intercept_col]]) + lm_slope <- first(group_data_filtered[[lm_slope_col]]) plot_config <- list( df = group_data_filtered, @@ -1122,10 +1092,9 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type) annotations = list( list(x = 1, y = y_limits[2] - 0.1 * y_span, label = paste(" ZShift =", round(Z_Shift_value, 2))), list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste(" lm ZScore =", round(Z_lm_value, 2))), - # list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste(" R-squared =", round(R_squared_value, 2))), - list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("NG =", NG_value)), - list(x = 1, y = y_limits[1] + 0.05 * y_span, label = paste("DB =", DB_value)), - list(x = 1, y = y_limits[1], label = paste("SM =", SM_value)) + list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("NG =", NG)), + list(x = 1, y = y_limits[1] + 0.05 * y_span, label = paste("DB =", DB)), + list(x = 1, y = y_limits[1], label = paste("SM =", SM)) ), error_bar = TRUE, error_bar_params = list( @@ -1267,7 +1236,7 @@ generate_correlation_plot_configs <- function(df, df_reference) { intercept <- df[[paste0("Correlation_lm_intercept_", rel$y, "_", rel$x)]][1] slope <- df[[paste0("Correlation_lm_slope_", rel$y, "_", rel$x)]][1] - r_squared <- df[[paste0("Correlation_lm_R_squared_", rel$y, "_", rel$x)]][1] + r_squared <- df[[paste0("Correlation_lm_R_Squared_", rel$y, "_", rel$x)]][1] r_squared_rounded <- round(r_squared, 4) r_squared_label <- paste("R-squared =", r_squared_rounded) xmin <- min(c(min(df[[x_var]]), min(df_reference[[x_var]]))) @@ -1355,7 +1324,7 @@ main <- function() { df_stats <- calculate_summary_stats( # formerly X_stats_ALL df = df, variables = c("L", "K", "r", "AUC", "delta_bg"), - group_vars = c("conc_num", "conc_num_factor_factor"))$df_with_stats + group_vars = c("conc_num", "conc_num_factor_factor"))$stats_joined frequency_delta_bg_plot_configs <- list( plots = list( @@ -1422,11 +1391,10 @@ main <- function() { df = df_na, variables = c("L", "K", "r", "AUC", "delta_bg"), group_vars = c("conc_num", "conc_num_factor_factor")) - df_na_ss <- ss$summary_stats - df_na_stats <- ss$df_with_stats # formerly X_stats_ALL - write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) + stats_na <- ss$stats_joined # formerly X_stats_ALL + write.csv(ss$stats_csv, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) # This can help bypass missing values ggplot warnings during testing - df_na_stats_filtered <- df_na_stats %>% filter(if_all(all_of(c("L", "K", "r", "AUC", "delta_bg")), is.finite)) + stats_na_filtered <- stats_na %>% filter(if_all(all_of(c("L", "K", "r", "AUC", "delta_bg")), is.finite)) message("Calculating summary statistics excluding zero values") df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero @@ -1434,40 +1402,40 @@ main <- function() { df = df_no_zeros, variables = c("L", "K", "r", "AUC", "delta_bg"), group_vars = c("conc_num", "conc_num_factor_factor") - )$df_with_stats + )$stats_joined message("Filtering by 2SD of K") - df_na_within_2sd_k <- df_na_stats %>% + df_na_within_2sd_k <- stats_na %>% filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K)) - df_na_outside_2sd_k <- df_na_stats %>% + df_na_outside_2sd_k <- stats_na %>% filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K)) message("Calculating summary statistics for L within 2SD of K") # TODO We're omitting the original z_max calculation, not sure if needed? ss <- calculate_summary_stats(df_na_within_2sd_k, "L", # formerly X_stats_BY_L_within_2SD_K - group_vars = c("conc_num", "conc_num_factor_factor"))$summary_stats - write.csv(ss, + group_vars = c("conc_num", "conc_num_factor_factor")) + write.csv(ss$stats_csv, file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2SD_K.csv"), row.names = FALSE) message("Calculating summary statistics for L outside 2SD of K") ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", # formerly X_stats_BY_L_outside_2SD_K group_vars = c("conc_num", "conc_num_factor_factor")) - df_na_l_outside_2sd_k_stats <- ss$df_with_stats - write.csv(ss$summary_stats, + df_na_l_outside_2sd_k_stats <- ss$stats_joined + write.csv(ss$stats_csv, file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2SD_K.csv"), row.names = FALSE) plate_analysis_plot_configs <- generate_plate_analysis_plot_configs( variables = c("L", "K", "r", "AUC", "delta_bg"), df_before = df_stats, - df_after = df_na_stats_filtered + df_after = stats_na_filtered ) plate_analysis_boxplot_configs <- generate_plate_analysis_plot_configs( variables = c("L", "K", "r", "AUC", "delta_bg"), df_before = df_stats, - df_after = df_na_stats_filtered, + df_after = stats_na_filtered, plot_type = "box" ) @@ -1587,17 +1555,17 @@ main <- function() { filter(!is.na(L)) message("Calculating background summary statistics") - ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY + ss <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), # formerly X_stats_BY group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor")) - summary_stats_bg <- ss_bg$summary_stats - df_bg_stats <- ss_bg$df_with_stats + stats_background_csv <- ss$stats_csv + stats_background <- ss$stats_joined write.csv( - summary_stats_bg, + stats_background_csv, file = file.path(out_dir, paste0("summary_stats_background_strain_", strain, ".csv")), row.names = FALSE) message("Setting missing reference values to the highest theoretical value at each drug conc for L") - df_reference <- df_na_stats %>% # formerly X2_RF + df_reference <- stats_na %>% # formerly X2_RF filter(OrfRep == strain) %>% filter(!is.na(L)) %>% group_by(OrfRep, Drug, conc_num, conc_num_factor_factor) %>% @@ -1609,14 +1577,14 @@ main <- function() { ungroup() message("Calculating reference strain summary statistics") - df_reference_summary_stats <- calculate_summary_stats( # formerly X_stats_X2_RF + stats_reference <- calculate_summary_stats( # formerly X_stats_X2_RF df = df_reference, variables = c("L", "K", "r", "AUC"), group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor") - )$df_with_stats + )$stats_joined # Summarise statistics for error bars - df_reference_summary_stats <- df_reference_summary_stats %>% + stats_reference <- stats_reference %>% group_by(OrfRep, Drug, conc_num, conc_num_factor_factor) %>% mutate( mean_mean_L = first(mean_L), @@ -1631,26 +1599,27 @@ main <- function() { ) message("Calculating reference strain interaction summary statistics") # formerly X_stats_interaction - df_reference_interaction_stats <- calculate_summary_stats( + stats_interactions_reference <- calculate_summary_stats( df = df_reference, variables = c("L", "K", "r", "AUC"), group_vars = c("OrfRep", "Gene", "num", "Drug", "conc_num", "conc_num_factor_factor") - )$df_with_stats + )$stats_joined message("Calculating reference strain interaction scores") - reference_results <- calculate_interaction_scores(df_reference_interaction_stats, df_bg_stats, "reference") - df_reference_calculations <- reference_results$calculations - df_reference_interactions_joined <- reference_results$full_data - df_reference_interactions <- reference_results$interactions - write.csv(df_reference_calculations, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) - write.csv(df_reference_interactions, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) + reference_results <- calculate_interaction_scores(stats_interactions_reference, stats_background, "reference") + calculations_reference_csv <- reference_results$calculations_csv + interactions_reference_csv <- reference_results$interactions_csv + interactions_reference <- reference_results$interactions + interactions_reference_joined <- reference_results$interactions_joined + write.csv(calculations_reference_csv, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE) + write.csv(interactions_reference_csv, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE) - # message("Generating reference interaction plots") - # reference_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_reference_interactions_joined, "reference") - # generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) + message("Generating reference interaction plots") + reference_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_reference_joined, "reference") + generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, page_width = 16, page_height = 16) message("Setting missing deletion values to the highest theoretical value at each drug conc for L") - df_deletion <- df_na_stats %>% # formerly X2 + deletion_adjusted <- stats_na %>% # formerly X2 filter(OrfRep != strain) %>% filter(!is.na(L)) %>% group_by(OrfRep, Gene, conc_num, conc_num_factor_factor) %>% @@ -1662,96 +1631,94 @@ main <- function() { ungroup() message("Calculating deletion strain(s) interaction summary statistics") - df_deletion_stats <- calculate_summary_stats( - df = df_deletion, + deletion_stats <- calculate_summary_stats( + df = deletion_adjusted, variables = c("L", "K", "r", "AUC"), group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor") - )$df_with_stats + )$stats_joined message("Calculating deletion strain(s) interactions scores") - deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, "deletion") - df_calculations <- deletion_results$calculations - df_interactions <- deletion_results$interactions - df_interactions_joined <- deletion_results$full_data - write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) - write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) + deletion_results <- calculate_interaction_scores(deletion_stats, stats_background, "deletion") + calculations_csv <- deletion_results$calculations_csv + interactions_csv <- deletion_results$interactions_csv + interactions <- deletion_results$interactions + interactions_joined <- deletion_results$interactions_joined + interactions_narm <- deletion_results$interactions_narm + write.csv(calculations_csv, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) + write.csv(interactions_csv, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) - # message("Generating deletion interaction plots") - # deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion") - # generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16) + message("Generating deletion interaction plots") + deletion_plot_configs <- generate_interaction_plot_configs(stats_reference, interactions_joined, "deletion") + generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16) - # message("Writing enhancer/suppressor csv files") - # interaction_threshold <- 2 # TODO add to study config? - # enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold - # suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold - # enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold - # suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold - # enhancers_L <- df_interactions[enhancer_condition_L, ] - # suppressors_L <- df_interactions[suppressor_condition_L, ] - # enhancers_K <- df_interactions[enhancer_condition_K, ] - # suppressors_K <- df_interactions[suppressor_condition_K, ] - # enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ] - # enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ] - # write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE) - # write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE) - # write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE) - # write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE) - # write.csv(enhancers_and_suppressors_L, - # file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE) - # write.csv(enhancers_and_suppressors_K, - # file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE) + message("Writing enhancer/suppressor csv files") + interaction_threshold <- 2 # TODO add to study config? + enhancer_condition_L <- interactions$Avg_Zscore_L >= interaction_threshold + suppressor_condition_L <- interactions$Avg_Zscore_L <= -interaction_threshold + enhancer_condition_K <- interactions$Avg_Zscore_K >= interaction_threshold + suppressor_condition_K <- interactions$Avg_Zscore_K <= -interaction_threshold + enhancers_L <- interactions[enhancer_condition_L, ] + suppressors_L <- interactions[suppressor_condition_L, ] + enhancers_K <- interactions[enhancer_condition_K, ] + suppressors_K <- interactions[suppressor_condition_K, ] + enhancers_and_suppressors_L <- interactions[enhancer_condition_L | suppressor_condition_L, ] + enhancers_and_suppressors_K <- interactions[enhancer_condition_K | suppressor_condition_K, ] + write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE) + write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE) + write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE) + write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE) + write.csv(enhancers_and_suppressors_L, + file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE) + write.csv(enhancers_and_suppressors_K, + file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE) - # message("Writing linear model enhancer/suppressor csv files") - # lm_interaction_threshold <- 2 # TODO add to study config? - # enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ] - # suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ] - # enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ] - # suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ] - # write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE) - # write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE) - # write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE) - # write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE) + message("Writing linear model enhancer/suppressor csv files") + lm_interaction_threshold <- 2 # TODO add to study config? + enhancers_lm_L <- interactions[interactions$Z_lm_L >= lm_interaction_threshold, ] + suppressors_lm_L <- interactions[interactions$Z_lm_L <= -lm_interaction_threshold, ] + enhancers_lm_K <- interactions[interactions$Z_lm_K >= lm_interaction_threshold, ] + suppressors_lm_K <- interactions[interactions$Z_lm_K <= -lm_interaction_threshold, ] + write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE) + write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE) + write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE) + write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE) - # message("Generating rank plots") - # rank_plot_configs <- generate_rank_plot_configs( - # df_interactions, - # is_lm = FALSE, - # ) - # generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs, - # page_width = 18, page_height = 12) + message("Generating rank plots") + rank_plot_configs <- generate_rank_plot_configs( + interactions, + is_lm = FALSE, + ) + generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs, page_width = 18, page_height = 12) - # message("Generating ranked linear model plots") - # rank_lm_plot_configs <- generate_rank_plot_configs( - # df_interactions, - # is_lm = TRUE, - # ) - # generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs, - # page_width = 18, page_height = 12) + message("Generating ranked linear model plots") + rank_lm_plot_configs <- generate_rank_plot_configs( + interactions, + is_lm = TRUE, + ) + generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs, page_width = 18, page_height = 12) - # message("Generating overlapped ranked plots") - # rank_plot_filtered_configs <- generate_rank_plot_configs( - # df_interactions, - # is_lm = FALSE, - # filter_na = TRUE, - # overlap_color = TRUE - # ) - # generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs, - # page_width = 18, page_height = 12) + message("Generating overlapped ranked plots") + rank_plot_filtered_configs <- generate_rank_plot_configs( + interactions_narm, + is_lm = FALSE, + # filter_na = TRUE, + overlap_color = TRUE + ) + generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs, page_width = 18, page_height = 12) - # message("Generating overlapped ranked linear model plots") - # rank_plot_lm_filtered_configs <- generate_rank_plot_configs( - # df_interactions, - # is_lm = TRUE, - # filter_na = TRUE, - # overlap_color = TRUE - # ) - # generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs, - # page_width = 18, page_height = 12) + message("Generating overlapped ranked linear model plots") + rank_plot_lm_filtered_configs <- generate_rank_plot_configs( + interactions_narm, + is_lm = TRUE, + # filter_na = TRUE, + overlap_color = TRUE + ) + generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs, page_width = 18, page_height = 12) message("Generating correlation curve parameter pair plots") correlation_plot_configs <- generate_correlation_plot_configs( - df_interactions, - df_reference_interactions + interactions_narm, + interactions_reference ) generate_and_save_plots(out_dir, "correlation_cpps", correlation_plot_configs, page_width = 10, page_height = 7)