From aef0fba1da2400cb4b3ed31e763b37d8cf018370 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Mon, 7 Oct 2024 18:05:23 -0400 Subject: [PATCH] Reorganize calculations to prevent column clobbering --- .../apps/r/calculate_interaction_zscores.R | 386 ++++++++---------- 1 file changed, 172 insertions(+), 214 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 3641d882..21321304 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -265,7 +265,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) df <- df %>% left_join(mean_zeroes, by = c(group_vars)) - # Calculate Raw Shifts and Z Shifts + # Calculate Raw Shifts and Z Shifts for all rows df <- df %>% mutate( Raw_Shift_L = mean_L_zero - WT_L, @@ -312,39 +312,6 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) ) %>% ungroup() - calculations <- calculations %>% - group_by(across(all_of(group_vars))) %>% - mutate( - # Apply the simple LM function for each variable - lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)), - lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)), - lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)), - lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)), - - # Extract coefficients and statistics for each model - lm_intercept_L = lm_L[[1]]$intercept, - lm_slope_L = lm_L[[1]]$slope, - R_Squared_L = lm_L[[1]]$r_squared, - lm_Score_L = lm_L[[1]]$score, - - lm_intercept_K = lm_K[[1]]$intercept, - lm_slope_K = lm_K[[1]]$slope, - R_Squared_K = lm_K[[1]]$r_squared, - lm_Score_K = lm_K[[1]]$score, - - lm_intercept_r = lm_r[[1]]$intercept, - lm_slope_r = lm_r[[1]]$slope, - R_Squared_r = lm_r[[1]]$r_squared, - lm_Score_r = lm_r[[1]]$score, - - lm_intercept_AUC = lm_AUC[[1]]$intercept, - lm_slope_AUC = lm_AUC[[1]]$slope, - R_Squared_AUC = lm_AUC[[1]]$r_squared, - lm_Score_AUC = lm_AUC[[1]]$score - ) %>% - select(-lm_L, -lm_K, -lm_r, -lm_AUC) %>% - ungroup() - # For interaction plot error bars delta_means_sds <- calculations %>% group_by(across(all_of(group_vars))) %>% @@ -363,8 +330,69 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) calculations <- calculations %>% left_join(delta_means_sds, by = group_vars) + # Calculate group-specific interactions + interactions <- calculations %>% + group_by(across(all_of(group_vars))) %>% + summarise( + NG_sum_int = sum(NG), + DB_sum_int = sum(DB), + SM_sum_int = sum(SM), + num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1, + + # Add background data + Raw_Shift_L = first(Raw_Shift_L), + Raw_Shift_K = first(Raw_Shift_K), + Raw_Shift_r = first(Raw_Shift_r), + Raw_Shift_AUC = first(Raw_Shift_AUC), + Z_Shift_L = first(Z_Shift_L), + Z_Shift_K = first(Z_Shift_K), + Z_Shift_r = first(Z_Shift_r), + Z_Shift_AUC = first(Z_Shift_AUC), + + # Sum Z-scores + Sum_Z_Score_L = sum(Zscore_L, na.rm = TRUE), + Sum_Z_Score_K = sum(Zscore_K, na.rm = TRUE), + Sum_Z_Score_r = sum(Zscore_r, na.rm = TRUE), + Sum_Z_Score_AUC = sum(Zscore_AUC, na.rm = TRUE), + + # We sum twice but it saves on creating another block + # TODO should we use mean() here, not sure + Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs), + Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs), + Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(total_conc_num - 1), + Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / first(total_conc_num - 1), + + # Perform gene-gene linear modeling + lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)), + lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)), + lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)), + lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)), + + # Extract coefficients and statistics for each model + lm_intercept_L = lm_L[[1]]$intercept, + lm_slope_L = lm_L[[1]]$slope, + R_Squared_L = lm_L[[1]]$r_squared, + lm_Score_L = lm_L[[1]]$score, + lm_intercept_K = lm_K[[1]]$intercept, + lm_slope_K = lm_K[[1]]$slope, + R_Squared_K = lm_K[[1]]$r_squared, + lm_Score_K = lm_K[[1]]$score, + lm_intercept_r = lm_r[[1]]$intercept, + lm_slope_r = lm_r[[1]]$slope, + R_Squared_r = lm_r[[1]]$r_squared, + lm_Score_r = lm_r[[1]]$score, + lm_intercept_AUC = lm_AUC[[1]]$intercept, + lm_slope_AUC = lm_AUC[[1]]$slope, + R_Squared_AUC = lm_AUC[[1]]$r_squared, + lm_Score_AUC = lm_AUC[[1]]$score, + + .groups = "drop" + ) %>% + select(-c(lm_L, lm_K, lm_r, lm_AUC)) # drop linear models since we have coefficients + # Summary statistics for lm scores - calculations <- calculations %>% + interactions <- interactions %>% + # group_by(across(all_of(group_vars))) %>% mutate( lm_mean_L = mean(lm_Score_L, na.rm = TRUE), lm_sd_L = sd(lm_Score_L, na.rm = TRUE), @@ -378,60 +406,6 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K, Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r, Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC - ) - - # Build summary stats (interactions) - interactions <- calculations %>% - group_by(across(all_of(group_vars))) %>% - summarise( - - num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1, - - Sum_Z_Score_L = sum(Zscore_L, na.rm = TRUE), - Sum_Z_Score_K = sum(Zscore_K, na.rm = TRUE), - Sum_Z_Score_r = sum(Zscore_r, na.rm = TRUE), - Sum_Z_Score_AUC = sum(Zscore_AUC, na.rm = TRUE), - - Avg_Zscore_L = Sum_Z_Score_L / first(num_non_removed_concs), - Avg_Zscore_K = Sum_Z_Score_K / first(num_non_removed_concs), - Avg_Zscore_r = Sum_Z_Score_r / first(num_non_removed_concs), - Avg_Zscore_AUC = Sum_Z_Score_AUC / first(num_non_removed_concs), - - # R_Squared - R_Squared_L = first(R_Squared_L), - R_Squared_K = first(R_Squared_K), - R_Squared_r = first(R_Squared_r), - R_Squared_AUC = first(R_Squared_AUC), - - # Interaction Z-scores - Z_lm_L = first(Z_lm_L), - Z_lm_K = first(Z_lm_K), - Z_lm_r = first(Z_lm_r), - Z_lm_AUC = first(Z_lm_AUC), - - # Raw Shifts - Raw_Shift_L = first(Raw_Shift_L), - Raw_Shift_K = first(Raw_Shift_K), - Raw_Shift_r = first(Raw_Shift_r), - Raw_Shift_AUC = first(Raw_Shift_AUC), - - # Z Shifts - Z_Shift_L = first(Z_Shift_L), - Z_Shift_K = first(Z_Shift_K), - Z_Shift_r = first(Z_Shift_r), - Z_Shift_AUC = first(Z_Shift_AUC), - - # Gene-Gene Interaction - lm_Score_L = first(lm_Score_L), - lm_Score_K = first(lm_Score_K), - lm_Score_r = first(lm_Score_r), - lm_Score_AUC = first(lm_Score_AUC), - - # NG, DB, SM values - NG_sum_int = sum(NG), - DB_sum_int = sum(DB), - SM_sum_int = sum(SM), - .groups = "drop" ) %>% arrange(desc(Z_lm_L), desc(NG_sum_int)) @@ -446,9 +420,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) Z_lm_L_adjusted = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L), Z_lm_K_adjusted = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K), Z_lm_r_adjusted = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r), - Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC) - ) %>% - mutate( + Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC), Rank_L = rank(Avg_Zscore_L_adjusted), Rank_K = rank(Avg_Zscore_K_adjusted), Rank_r = rank(Avg_Zscore_r_adjusted), @@ -456,13 +428,21 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) Rank_lm_L = rank(Z_lm_L_adjusted), Rank_lm_K = rank(Z_lm_K_adjusted), Rank_lm_r = rank(Z_lm_r_adjusted), - Rank_lm_AUC = rank(Z_lm_AUC_adjusted) - ) %>% - mutate( - lm_R_squared_rank_L = summary(lm(Rank_lm_L ~ Rank_L, data = .))$r.squared, - lm_R_squared_rank_K = summary(lm(Rank_lm_K ~ Rank_K, data = .))$r.squared, - lm_R_squared_rank_r = summary(lm(Rank_lm_r ~ Rank_r, data = .))$r.squared, - lm_R_squared_rank_AUC = summary(lm(Rank_lm_AUC ~ Rank_AUC, data = .))$r.squared + Rank_lm_AUC = rank(Z_lm_AUC_adjusted), + Rank_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)), + Rank_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)), + Rank_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)), + Rank_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)), + Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)), + Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)), + Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)), + Correlation_lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)), + Correlation_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)), + Correlation_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)), + Correlation_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)), + Correlation_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)), + Correlation_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)), + Correlation_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc)) ) # Add overlap threshold categories based on Z-lm and Avg-Z scores @@ -480,122 +460,101 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore", TRUE ~ "No Effect" ), - # Apply the perform_lm function for each variable pair - lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)), - lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)), - lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)), - lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)), - # Correlation models for various pairs - Z_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)), - Z_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)), - Z_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)), - Z_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)), - Z_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)), - Z_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc)), + Rank_lm_R_squared_L = Rank_lm_L[[1]]$r_squared, + Rank_lm_R_squared_K = Rank_lm_L[[1]]$r_squared, + Rank_lm_R_squared_r = Rank_lm_r[[1]]$r_squared, + Rank_lm_R_squared_AUC = Rank_lm_AUC[[1]]$r_squared, + Correlation_lm_intercept_L = Correlation_lm_L[[1]]$intercept, + Correlation_lm_slope_L = Correlation_lm_L[[1]]$slope, + Correlation_lm_R_Squared_L = Correlation_lm_L[[1]]$r_squared, + Correlation_lm_Score_L = Correlation_lm_L[[1]]$score, + Correlation_lm_intercept_K = Correlation_lm_K[[1]]$intercept, + Correlation_lm_slope_K = Correlation_lm_K[[1]]$slope, + Correlation_lm_R_Squared_K = Correlation_lm_K[[1]]$r_squared, + Correlation_lm_Score_K = Correlation_lm_K[[1]]$score, + Correlation_lm_intercept_r = Correlation_lm_r[[1]]$intercept, + Correlation_lm_slope_r = Correlation_lm_r[[1]]$slope, + Correlation_lm_R_Squared_r = Correlation_lm_r[[1]]$r_squared, + Correlation_lm_Score_r = Correlation_lm_r[[1]]$score, + Correlation_lm_intercept_AUC = Correlation_lm_AUC[[1]]$intercept, + Correlation_lm_slope_AUC = Correlation_lm_AUC[[1]]$slope, + Correlation_lm_R_Squared_AUC = Correlation_lm_AUC[[1]]$r_squared, + Correlation_lm_Score_AUC = Correlation_lm_AUC[[1]]$score, + Correlation_lm_intercept_K_L = Correlation_lm_K_L[[1]]$intercept, + Correlation_lm_slope_K_L = Correlation_lm_K_L[[1]]$slope, + Correlation_lm_R_squared_K_L = Correlation_lm_K_L[[1]]$r_squared, + Correlation_lm_Score_K_L = Correlation_lm_K_L[[1]]$score, + Correlation_lm_intercept_r_L = Correlation_lm_r_L[[1]]$intercept, + Correlation_lm_slope_r_L = Correlation_lm_r_L[[1]]$slope, + Correlation_lm_R_squared_r_L = Correlation_lm_r_L[[1]]$r_squared, + Correlation_lm_Score_r_L = Correlation_lm_r_L[[1]]$score, + Correlation_lm_intercept_AUC_L = Correlation_lm_AUC_L[[1]]$intercept, + Correlation_lm_slope_AUC_L = Correlation_lm_AUC_L[[1]]$slope, + Correlation_lm_R_squared_AUC_L = Correlation_lm_AUC_L[[1]]$r_squared, + Correlation_lm_Score_AUC_L = Correlation_lm_AUC_L[[1]]$score, + Correlation_lm_intercept_r_K = Correlation_lm_r_K[[1]]$intercept, + Correlation_lm_slope_r_K = Correlation_lm_r_K[[1]]$slope, + Correlation_lm_R_squared_r_K = Correlation_lm_r_K[[1]]$r_squared, + Correlation_lm_Score_r_K = Correlation_lm_r_K[[1]]$score, + Correlation_lm_intercept_AUC_K = Correlation_lm_AUC_K[[1]]$intercept, + Correlation_lm_slope_AUC_K = Correlation_lm_AUC_K[[1]]$slope, + Correlation_lm_R_squared_AUC_K = Correlation_lm_AUC_K[[1]]$r_squared, + Correlation_lm_Score_AUC_K = Correlation_lm_AUC_K[[1]]$score, + Correlation_lm_intercept_AUC_r = Correlation_lm_AUC_r[[1]]$intercept, + Correlation_lm_slope_AUC_r = Correlation_lm_AUC_r[[1]]$slope, + Correlation_lm_R_squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared, + Correlation_lm_Score_AUC_r = Correlation_lm_AUC_r[[1]]$score + ) + } - # Extract coefficients and statistics for each model - lm_rank_intercept_L = lm_L[[1]]$intercept, - lm_rank_slope_L = lm_L[[1]]$slope, - R_Squared_L = lm_L[[1]]$r_squared, - lm_Score_L = lm_L[[1]]$score, - - lm_intercept_K = lm_K[[1]]$intercept, - lm_slope_K = lm_K[[1]]$slope, - R_Squared_K = lm_K[[1]]$r_squared, - lm_Score_K = lm_K[[1]]$score, - - lm_intercept_r = lm_r[[1]]$intercept, - lm_slope_r = lm_r[[1]]$slope, - R_Squared_r = lm_r[[1]]$r_squared, - lm_Score_r = lm_r[[1]]$score, - - lm_intercept_AUC = lm_AUC[[1]]$intercept, - lm_slope_AUC = lm_AUC[[1]]$slope, - R_Squared_AUC = lm_AUC[[1]]$r_squared, - lm_Score_AUC = lm_AUC[[1]]$score, - - Z_lm_intercept_K_L = Z_lm_K_L[[1]]$intercept, - Z_lm_slope_K_L = Z_lm_K_L[[1]]$slope, - Z_lm_R_squared_K_L = Z_lm_K_L[[1]]$r_squared, - Z_lm_Score_K_L = Z_lm_K_L[[1]]$score, - - Z_lm_intercept_r_L = Z_lm_r_L[[1]]$intercept, - Z_lm_slope_r_L = Z_lm_r_L[[1]]$slope, - Z_lm_R_squared_r_L = Z_lm_r_L[[1]]$r_squared, - Z_lm_Score_r_L = Z_lm_r_L[[1]]$score, - - Z_lm_intercept_AUC_L = Z_lm_AUC_L[[1]]$intercept, - Z_lm_slope_AUC_L = Z_lm_AUC_L[[1]]$slope, - Z_lm_R_squared_AUC_L = Z_lm_AUC_L[[1]]$r_squared, - Z_lm_Score_AUC_L = Z_lm_AUC_L[[1]]$score, - - Z_lm_intercept_r_K = Z_lm_r_K[[1]]$intercept, - Z_lm_slope_r_K = Z_lm_r_K[[1]]$slope, - Z_lm_R_squared_r_K = Z_lm_r_K[[1]]$r_squared, - Z_lm_Score_r_K = Z_lm_r_K[[1]]$score, - - Z_lm_intercept_AUC_K = Z_lm_AUC_K[[1]]$intercept, - Z_lm_slope_AUC_K = Z_lm_AUC_K[[1]]$slope, - Z_lm_R_squared_AUC_K = Z_lm_AUC_K[[1]]$r_squared, - Z_lm_Score_AUC_K = Z_lm_AUC_K[[1]]$score, - - Z_lm_intercept_AUC_r = Z_lm_AUC_r[[1]]$intercept, - Z_lm_slope_AUC_r = Z_lm_AUC_r[[1]]$slope, - Z_lm_R_squared_AUC_r = Z_lm_AUC_r[[1]]$r_squared, - Z_lm_Score_AUC_r = Z_lm_AUC_r[[1]]$score - ) %>% - select( - -lm_L, -lm_K, -lm_r, -lm_AUC, - -Z_lm_K_L, -Z_lm_r_L, -Z_lm_AUC_L, -Z_lm_r_K, -Z_lm_AUC_K, -Z_lm_AUC_r) - } # end deletion-specific block - - # Create the final calculations and interactions dataframes with only required columns for csv output + # Create the final calculations and interactions dataframes with specific columns for csv output + # Trying to mimic original output data df_calculations <- calculations %>% - select( - all_of(group_vars), - conc_num, conc_num_factor, conc_num_factor_factor, N, - mean_L, median_L, sd_L, se_L, - mean_K, median_K, sd_K, se_K, - mean_r, median_r, sd_r, se_r, - mean_AUC, median_AUC, sd_AUC, se_AUC, - Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC, - Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC, - WT_L, WT_K, WT_r, WT_AUC, - WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC, - Exp_L, Exp_K, Exp_r, Exp_AUC, - Delta_L, Delta_K, Delta_r, Delta_AUC, - mean_Delta_L, mean_Delta_K, mean_Delta_r, mean_Delta_AUC, - Zscore_L, Zscore_K, Zscore_r, Zscore_AUC, - NG_sum, DB_sum, SM_sum - ) %>% - rename(NG = NG_sum, DB = DB_sum, SM = SM_sum) + select(all_of(c( + group_vars, # necessary for full_data left_join + "conc_num", "conc_num_factor", "conc_num_factor_factor", "N", + "mean_L", "median_L", "sd_L", "se_L", + "mean_K", "median_K", "sd_K", "se_K", + "mean_r", "median_r", "sd_r", "se_r", + "mean_AUC", "median_AUC", "sd_AUC", "se_AUC", + "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", + "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", + "WT_L", "WT_K", "WT_r", "WT_AUC", + "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC", + "Exp_L", "Exp_K", "Exp_r", "Exp_AUC", + "Delta_L", "Delta_K", "Delta_r", "Delta_AUC", + "mean_Delta_L", "mean_Delta_K", "mean_Delta_r", "mean_Delta_AUC", + "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC", + "NG_sum", "DB_sum", "SM_sum" + ))) %>% + rename(NG = NG_sum, DB = DB_sum, SM = SM_sum) df_interactions <- interactions %>% - select( - any_of(c( - group_vars, - "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC", - "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC", - "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC", - "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", - "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", - "lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC", - "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", - "NG_sum_int", "DB_sum_int", "SM_sum_int", - "Z_lm_intercept_L", "Z_lm_slope_L", "Z_lm_R_squared_L", "Z_lm_Score_L", - "Z_lm_intercept_K", "Z_lm_slope_K", "Z_lm_R_squared_K", "Z_lm_Score_K", - "Z_lm_intercept_r", "Z_lm_slope_r", "Z_lm_R_squared_r", "Z_lm_Score_r", - "Z_lm_intercept_AUC", "Z_lm_slope_AUC", "Z_lm_R_squared_AUC", "Z_lm_Score_AUC", - "Z_lm_intercept_K_L", "Z_lm_slope_K_L", "Z_lm_R_squared_K_L", "Z_lm_Score_K_L", - "Z_lm_intercept_r_L", "Z_lm_slope_r_L", "Z_lm_R_squared_r_L", "Z_lm_Score_r_L", - "Z_lm_intercept_AUC_L", "Z_lm_slope_AUC_L", "Z_lm_R_squared_AUC_L", "Z_lm_Score_AUC_L", - "Z_lm_intercept_r_K", "Z_lm_slope_r_K", "Z_lm_R_squared_r_K", "Z_lm_Score_r_K", - "Z_lm_intercept_AUC_K", "Z_lm_slope_AUC_K", "Z_lm_R_squared_AUC_K", "Z_lm_Score_AUC_K", - "Z_lm_intercept_AUC_r", "Z_lm_slope_AUC_r", "Z_lm_R_squared_AUC_r", "Z_lm_Score_AUC_r" - )) - ) %>% - rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int) + select(any_of(c( + group_vars, # necessary for full_data left_join + "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC", + "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC", + "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC", + "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC", + "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", + "lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC", + "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", + "NG_sum_int", "DB_sum_int", "SM_sum_int", + "Rank_lm_R_squared_L", "Rank_lm_R_squared_K", "Rank_lm_R_squared_r", "Rank_lm_R_squared_AUC", + "Correlation_lm_intercept_L", "Correlation_lm_slope_L", "Correlation_lm_R_squared_L", "Correlation_lm_Score_L", + "Correlation_lm_intercept_K", "Correlation_lm_slope_K", "Correlation_lm_R_squared_K", "Correlation_lm_Score_K", + "Correlation_lm_intercept_r", "Correlation_lm_slope_r", "Correlation_lm_R_squared_r", "Correlation_lm_Score_r", + "Correlation_lm_intercept_AUC", "Correlation_lm_slope_AUC", "Correlation_lm_R_squared_AUC", "Correlation_lm_Score_AUC", + "Correlation_lm_intercept_K_L", "Correlation_lm_slope_K_L", "Correlation_lm_R_squared_K_L", "Correlation_lm_Score_K_L", + "Correlation_lm_intercept_r_L", "Correlation_lm_slope_r_L", "Correlation_lm_R_squared_r_L", "Correlation_lm_Score_r_L", + "Correlation_lm_intercept_AUC_L", "Correlation_lm_slope_AUC_L", "Correlation_lm_R_squared_AUC_L", "Correlation_lm_Score_AUC_L", + "Correlation_lm_intercept_r_K", "Correlation_lm_slope_r_K", "Correlation_lm_R_squared_r_K", "Correlation_lm_Score_r_K", + "Correlation_lm_intercept_AUC_K", "Correlation_lm_slope_AUC_K", "Correlation_lm_R_squared_AUC_K", "Correlation_lm_Score_AUC_K", + "Correlation_lm_intercept_AUC_r", "Correlation_lm_slope_AUC_r", "Correlation_lm_R_squared_AUC_r", "Correlation_lm_Score_AUC_r", + "Overlap" + ))) %>% + rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int) # Avoid column collision on left join for overlapping variables calculations_no_overlap <- calculations %>% @@ -1306,12 +1265,11 @@ generate_correlation_plot_configs <- function(df, df_reference) { x_var <- paste0("Z_lm_", rel$x) y_var <- paste0("Z_lm_", rel$y) - intercept <- df[[paste0("Z_lm_intercept_", rel$y, "_", rel$x)]][1] - slope <- df[[paste0("Z_lm_slope_", rel$y, "_", rel$x)]][1] - r_squared <- df[[paste0("Z_lm_R_squared_", rel$y, "_", rel$x)]][1] + intercept <- df[[paste0("Correlation_lm_intercept_", rel$y, "_", rel$x)]][1] + slope <- df[[paste0("Correlation_lm_slope_", rel$y, "_", rel$x)]][1] + r_squared <- df[[paste0("Correlation_lm_R_squared_", rel$y, "_", rel$x)]][1] r_squared_rounded <- round(r_squared, 4) r_squared_label <- paste("R-squared =", r_squared_rounded) - xmin <- min(c(min(df[[x_var]]), min(df_reference[[x_var]]))) xmax <- max(c(max(df[[x_var]]), max(df_reference[[x_var]])))