Reorganize calculations to prevent column clobbering

2024-10-07 18:05:23 -04:00
parent a23565fad4
commit aef0fba1da
1 changed files with 172 additions and 214 deletions
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -265,7 +265,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
  df <- df %>%
    left_join(mean_zeroes, by = c(group_vars))
    
-  # Calculate Raw Shifts and Z Shifts
+  # Calculate Raw Shifts and Z Shifts for all rows
  df <- df %>%
    mutate(
      Raw_Shift_L = mean_L_zero - WT_L,
@@ -312,39 +312,6 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
    ) %>%
    ungroup()
    
-  calculations <- calculations %>%
-    group_by(across(all_of(group_vars))) %>%
-    mutate(
-      # Apply the simple LM function for each variable
-      lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)),
-      lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)),
-      lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)),
-      lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)),
-
-      # Extract coefficients and statistics for each model
-      lm_intercept_L = lm_L[[1]]$intercept,
-      lm_slope_L = lm_L[[1]]$slope,
-      R_Squared_L = lm_L[[1]]$r_squared,
-      lm_Score_L = lm_L[[1]]$score,
-
-      lm_intercept_K = lm_K[[1]]$intercept,
-      lm_slope_K = lm_K[[1]]$slope,
-      R_Squared_K = lm_K[[1]]$r_squared,
-      lm_Score_K = lm_K[[1]]$score,
-
-      lm_intercept_r = lm_r[[1]]$intercept,
-      lm_slope_r = lm_r[[1]]$slope,
-      R_Squared_r = lm_r[[1]]$r_squared,
-      lm_Score_r = lm_r[[1]]$score,
-
-      lm_intercept_AUC = lm_AUC[[1]]$intercept,
-      lm_slope_AUC = lm_AUC[[1]]$slope,
-      R_Squared_AUC = lm_AUC[[1]]$r_squared,
-      lm_Score_AUC = lm_AUC[[1]]$score
-      ) %>%
-      select(-lm_L, -lm_K, -lm_r, -lm_AUC) %>%
-      ungroup()
-
  # For interaction plot error bars
  delta_means_sds <- calculations %>%
    group_by(across(all_of(group_vars))) %>%
@@ -363,8 +330,69 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
  calculations <- calculations %>%
    left_join(delta_means_sds, by = group_vars)

+  # Calculate group-specific interactions
+  interactions <- calculations %>%
+    group_by(across(all_of(group_vars))) %>%
+    summarise(
+      NG_sum_int = sum(NG),
+      DB_sum_int = sum(DB),
+      SM_sum_int = sum(SM),
+      num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
+
+      # Add background data
+      Raw_Shift_L = first(Raw_Shift_L),
+      Raw_Shift_K = first(Raw_Shift_K),
+      Raw_Shift_r = first(Raw_Shift_r),
+      Raw_Shift_AUC = first(Raw_Shift_AUC),
+      Z_Shift_L = first(Z_Shift_L),
+      Z_Shift_K = first(Z_Shift_K),
+      Z_Shift_r = first(Z_Shift_r),
+      Z_Shift_AUC = first(Z_Shift_AUC),
+
+      # Sum Z-scores
+      Sum_Z_Score_L = sum(Zscore_L, na.rm = TRUE),
+      Sum_Z_Score_K = sum(Zscore_K, na.rm = TRUE),
+      Sum_Z_Score_r = sum(Zscore_r, na.rm = TRUE),
+      Sum_Z_Score_AUC = sum(Zscore_AUC, na.rm = TRUE),
+
+      # We sum twice but it saves on creating another block
+      # TODO should we use mean() here, not sure
+      Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
+      Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
+      Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(total_conc_num - 1),
+      Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / first(total_conc_num - 1),
+
+      # Perform gene-gene linear modeling
+      lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)),
+      lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)),
+      lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)),
+      lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)),
+
+      # Extract coefficients and statistics for each model
+      lm_intercept_L = lm_L[[1]]$intercept,
+      lm_slope_L = lm_L[[1]]$slope,
+      R_Squared_L = lm_L[[1]]$r_squared,
+      lm_Score_L = lm_L[[1]]$score,
+      lm_intercept_K = lm_K[[1]]$intercept,
+      lm_slope_K = lm_K[[1]]$slope,
+      R_Squared_K = lm_K[[1]]$r_squared,
+      lm_Score_K = lm_K[[1]]$score,
+      lm_intercept_r = lm_r[[1]]$intercept,
+      lm_slope_r = lm_r[[1]]$slope,
+      R_Squared_r = lm_r[[1]]$r_squared,
+      lm_Score_r = lm_r[[1]]$score,
+      lm_intercept_AUC = lm_AUC[[1]]$intercept,
+      lm_slope_AUC = lm_AUC[[1]]$slope,
+      R_Squared_AUC = lm_AUC[[1]]$r_squared,
+      lm_Score_AUC = lm_AUC[[1]]$score,
+
+      .groups = "drop"
+    ) %>%
+    select(-c(lm_L, lm_K, lm_r, lm_AUC)) # drop linear models since we have coefficients
+
  # Summary statistics for lm scores
-  calculations <- calculations %>%
+  interactions <- interactions %>%
+    # group_by(across(all_of(group_vars))) %>%
    mutate(
      lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
      lm_sd_L = sd(lm_Score_L, na.rm = TRUE),
@@ -378,60 +406,6 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
      Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
      Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
      Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
-    )
-
-  # Build summary stats (interactions)
-  interactions <- calculations %>%
-    group_by(across(all_of(group_vars))) %>%
-    summarise(
-
-      num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
-
-      Sum_Z_Score_L = sum(Zscore_L, na.rm = TRUE),
-      Sum_Z_Score_K = sum(Zscore_K, na.rm = TRUE),
-      Sum_Z_Score_r = sum(Zscore_r, na.rm = TRUE),
-      Sum_Z_Score_AUC = sum(Zscore_AUC, na.rm = TRUE),
-
-      Avg_Zscore_L = Sum_Z_Score_L / first(num_non_removed_concs),
-      Avg_Zscore_K = Sum_Z_Score_K / first(num_non_removed_concs),
-      Avg_Zscore_r = Sum_Z_Score_r / first(num_non_removed_concs),
-      Avg_Zscore_AUC = Sum_Z_Score_AUC / first(num_non_removed_concs),
-
-      # R_Squared
-      R_Squared_L = first(R_Squared_L),
-      R_Squared_K = first(R_Squared_K),
-      R_Squared_r = first(R_Squared_r),
-      R_Squared_AUC = first(R_Squared_AUC),
-      
-      # Interaction Z-scores
-      Z_lm_L = first(Z_lm_L),
-      Z_lm_K = first(Z_lm_K),
-      Z_lm_r = first(Z_lm_r),
-      Z_lm_AUC = first(Z_lm_AUC),
-      
-      # Raw Shifts
-      Raw_Shift_L = first(Raw_Shift_L),
-      Raw_Shift_K = first(Raw_Shift_K),
-      Raw_Shift_r = first(Raw_Shift_r),
-      Raw_Shift_AUC = first(Raw_Shift_AUC),
-      
-      # Z Shifts
-      Z_Shift_L = first(Z_Shift_L),
-      Z_Shift_K = first(Z_Shift_K),
-      Z_Shift_r = first(Z_Shift_r),
-      Z_Shift_AUC = first(Z_Shift_AUC),
-
-      # Gene-Gene Interaction
-      lm_Score_L = first(lm_Score_L),
-      lm_Score_K = first(lm_Score_K),
-      lm_Score_r = first(lm_Score_r),
-      lm_Score_AUC = first(lm_Score_AUC),
-
-      # NG, DB, SM values
-      NG_sum_int = sum(NG),
-      DB_sum_int = sum(DB),
-      SM_sum_int = sum(SM),
-      .groups = "drop"
    ) %>%
    arrange(desc(Z_lm_L), desc(NG_sum_int))

@@ -446,9 +420,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
        Z_lm_L_adjusted = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
        Z_lm_K_adjusted = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
        Z_lm_r_adjusted = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
-        Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC)
-      ) %>%
-      mutate(
+        Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC),
        Rank_L = rank(Avg_Zscore_L_adjusted),
        Rank_K = rank(Avg_Zscore_K_adjusted),
        Rank_r = rank(Avg_Zscore_r_adjusted),
@@ -456,13 +428,21 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
        Rank_lm_L = rank(Z_lm_L_adjusted),
        Rank_lm_K = rank(Z_lm_K_adjusted),
        Rank_lm_r = rank(Z_lm_r_adjusted),
-        Rank_lm_AUC = rank(Z_lm_AUC_adjusted)
-      ) %>%
-      mutate(
-        lm_R_squared_rank_L = summary(lm(Rank_lm_L ~ Rank_L, data = .))$r.squared,
-        lm_R_squared_rank_K = summary(lm(Rank_lm_K ~ Rank_K, data = .))$r.squared,
-        lm_R_squared_rank_r = summary(lm(Rank_lm_r ~ Rank_r, data = .))$r.squared,
-        lm_R_squared_rank_AUC = summary(lm(Rank_lm_AUC ~ Rank_AUC, data = .))$r.squared
+        Rank_lm_AUC = rank(Z_lm_AUC_adjusted),
+        Rank_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
+        Rank_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
+        Rank_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
+        Rank_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
+        Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
+        Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
+        Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
+        Correlation_lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),
+        Correlation_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)),
+        Correlation_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)),
+        Correlation_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)),
+        Correlation_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)),
+        Correlation_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)),
+        Correlation_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc))
      )

    # Add overlap threshold categories based on Z-lm and Avg-Z scores
@@ -480,122 +460,101 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
          Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
          TRUE ~ "No Effect"
        ),
-        # Apply the perform_lm function for each variable pair
-        lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
-        lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
-        lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
-        lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),

-        # Correlation models for various pairs
-        Z_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)),
-        Z_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)),
-        Z_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)),
-        Z_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)),
-        Z_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)),
-        Z_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc)),
+        Rank_lm_R_squared_L = Rank_lm_L[[1]]$r_squared,
+        Rank_lm_R_squared_K = Rank_lm_L[[1]]$r_squared,
+        Rank_lm_R_squared_r = Rank_lm_r[[1]]$r_squared,
+        Rank_lm_R_squared_AUC = Rank_lm_AUC[[1]]$r_squared,
+        Correlation_lm_intercept_L = Correlation_lm_L[[1]]$intercept,
+        Correlation_lm_slope_L = Correlation_lm_L[[1]]$slope,
+        Correlation_lm_R_Squared_L = Correlation_lm_L[[1]]$r_squared,
+        Correlation_lm_Score_L = Correlation_lm_L[[1]]$score,
+        Correlation_lm_intercept_K = Correlation_lm_K[[1]]$intercept,
+        Correlation_lm_slope_K = Correlation_lm_K[[1]]$slope,
+        Correlation_lm_R_Squared_K = Correlation_lm_K[[1]]$r_squared,
+        Correlation_lm_Score_K = Correlation_lm_K[[1]]$score,
+        Correlation_lm_intercept_r = Correlation_lm_r[[1]]$intercept,
+        Correlation_lm_slope_r = Correlation_lm_r[[1]]$slope,
+        Correlation_lm_R_Squared_r = Correlation_lm_r[[1]]$r_squared,
+        Correlation_lm_Score_r = Correlation_lm_r[[1]]$score,
+        Correlation_lm_intercept_AUC = Correlation_lm_AUC[[1]]$intercept,
+        Correlation_lm_slope_AUC = Correlation_lm_AUC[[1]]$slope,
+        Correlation_lm_R_Squared_AUC = Correlation_lm_AUC[[1]]$r_squared,
+        Correlation_lm_Score_AUC = Correlation_lm_AUC[[1]]$score,
+        Correlation_lm_intercept_K_L = Correlation_lm_K_L[[1]]$intercept,
+        Correlation_lm_slope_K_L = Correlation_lm_K_L[[1]]$slope,
+        Correlation_lm_R_squared_K_L = Correlation_lm_K_L[[1]]$r_squared,
+        Correlation_lm_Score_K_L = Correlation_lm_K_L[[1]]$score,
+        Correlation_lm_intercept_r_L = Correlation_lm_r_L[[1]]$intercept,
+        Correlation_lm_slope_r_L = Correlation_lm_r_L[[1]]$slope,
+        Correlation_lm_R_squared_r_L = Correlation_lm_r_L[[1]]$r_squared,
+        Correlation_lm_Score_r_L = Correlation_lm_r_L[[1]]$score,
+        Correlation_lm_intercept_AUC_L = Correlation_lm_AUC_L[[1]]$intercept,
+        Correlation_lm_slope_AUC_L = Correlation_lm_AUC_L[[1]]$slope,
+        Correlation_lm_R_squared_AUC_L = Correlation_lm_AUC_L[[1]]$r_squared,
+        Correlation_lm_Score_AUC_L = Correlation_lm_AUC_L[[1]]$score,
+        Correlation_lm_intercept_r_K = Correlation_lm_r_K[[1]]$intercept,
+        Correlation_lm_slope_r_K = Correlation_lm_r_K[[1]]$slope,
+        Correlation_lm_R_squared_r_K = Correlation_lm_r_K[[1]]$r_squared,
+        Correlation_lm_Score_r_K = Correlation_lm_r_K[[1]]$score,
+        Correlation_lm_intercept_AUC_K = Correlation_lm_AUC_K[[1]]$intercept,
+        Correlation_lm_slope_AUC_K = Correlation_lm_AUC_K[[1]]$slope,
+        Correlation_lm_R_squared_AUC_K = Correlation_lm_AUC_K[[1]]$r_squared,
+        Correlation_lm_Score_AUC_K = Correlation_lm_AUC_K[[1]]$score,
+        Correlation_lm_intercept_AUC_r = Correlation_lm_AUC_r[[1]]$intercept,
+        Correlation_lm_slope_AUC_r = Correlation_lm_AUC_r[[1]]$slope,
+        Correlation_lm_R_squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared,
+        Correlation_lm_Score_AUC_r = Correlation_lm_AUC_r[[1]]$score
+      )
+  }

-        # Extract coefficients and statistics for each model
-        lm_rank_intercept_L = lm_L[[1]]$intercept,
-        lm_rank_slope_L = lm_L[[1]]$slope,
-        R_Squared_L = lm_L[[1]]$r_squared,
-        lm_Score_L = lm_L[[1]]$score,
-
-        lm_intercept_K = lm_K[[1]]$intercept,
-        lm_slope_K = lm_K[[1]]$slope,
-        R_Squared_K = lm_K[[1]]$r_squared,
-        lm_Score_K = lm_K[[1]]$score,
-
-        lm_intercept_r = lm_r[[1]]$intercept,
-        lm_slope_r = lm_r[[1]]$slope,
-        R_Squared_r = lm_r[[1]]$r_squared,
-        lm_Score_r = lm_r[[1]]$score,
-
-        lm_intercept_AUC = lm_AUC[[1]]$intercept,
-        lm_slope_AUC = lm_AUC[[1]]$slope,
-        R_Squared_AUC = lm_AUC[[1]]$r_squared,
-        lm_Score_AUC = lm_AUC[[1]]$score,
-
-        Z_lm_intercept_K_L = Z_lm_K_L[[1]]$intercept,
-        Z_lm_slope_K_L = Z_lm_K_L[[1]]$slope,
-        Z_lm_R_squared_K_L = Z_lm_K_L[[1]]$r_squared,
-        Z_lm_Score_K_L = Z_lm_K_L[[1]]$score,
-
-        Z_lm_intercept_r_L = Z_lm_r_L[[1]]$intercept,
-        Z_lm_slope_r_L = Z_lm_r_L[[1]]$slope,
-        Z_lm_R_squared_r_L = Z_lm_r_L[[1]]$r_squared,
-        Z_lm_Score_r_L = Z_lm_r_L[[1]]$score,
-
-        Z_lm_intercept_AUC_L = Z_lm_AUC_L[[1]]$intercept,
-        Z_lm_slope_AUC_L = Z_lm_AUC_L[[1]]$slope,
-        Z_lm_R_squared_AUC_L = Z_lm_AUC_L[[1]]$r_squared,
-        Z_lm_Score_AUC_L = Z_lm_AUC_L[[1]]$score,
-
-        Z_lm_intercept_r_K = Z_lm_r_K[[1]]$intercept,
-        Z_lm_slope_r_K = Z_lm_r_K[[1]]$slope,
-        Z_lm_R_squared_r_K = Z_lm_r_K[[1]]$r_squared,
-        Z_lm_Score_r_K = Z_lm_r_K[[1]]$score,
-
-        Z_lm_intercept_AUC_K = Z_lm_AUC_K[[1]]$intercept,
-        Z_lm_slope_AUC_K = Z_lm_AUC_K[[1]]$slope,
-        Z_lm_R_squared_AUC_K = Z_lm_AUC_K[[1]]$r_squared,
-        Z_lm_Score_AUC_K = Z_lm_AUC_K[[1]]$score,
-
-        Z_lm_intercept_AUC_r = Z_lm_AUC_r[[1]]$intercept,
-        Z_lm_slope_AUC_r = Z_lm_AUC_r[[1]]$slope,
-        Z_lm_R_squared_AUC_r = Z_lm_AUC_r[[1]]$r_squared,
-        Z_lm_Score_AUC_r = Z_lm_AUC_r[[1]]$score
-      ) %>%
-      select(
-        -lm_L, -lm_K, -lm_r, -lm_AUC,
-        -Z_lm_K_L, -Z_lm_r_L, -Z_lm_AUC_L, -Z_lm_r_K, -Z_lm_AUC_K, -Z_lm_AUC_r)
-  } # end deletion-specific block
-
-  # Create the final calculations and interactions dataframes with only required columns for csv output
+  # Create the final calculations and interactions dataframes with specific columns for csv output
+  # Trying to mimic original output data
  df_calculations <- calculations %>%
-    select(
-      all_of(group_vars),
-      conc_num, conc_num_factor, conc_num_factor_factor, N,
-      mean_L, median_L, sd_L, se_L,
-      mean_K, median_K, sd_K, se_K,
-      mean_r, median_r, sd_r, se_r,
-      mean_AUC, median_AUC, sd_AUC, se_AUC,
-      Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
-      Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
-      WT_L, WT_K, WT_r, WT_AUC,
-      WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
-      Exp_L, Exp_K, Exp_r, Exp_AUC,
-      Delta_L, Delta_K, Delta_r, Delta_AUC,
-      mean_Delta_L, mean_Delta_K, mean_Delta_r, mean_Delta_AUC,
-      Zscore_L, Zscore_K, Zscore_r, Zscore_AUC,
-      NG_sum, DB_sum, SM_sum
-    ) %>%
-      rename(NG = NG_sum, DB = DB_sum, SM = SM_sum)
+    select(all_of(c(
+      group_vars, # necessary for full_data left_join
+      "conc_num", "conc_num_factor", "conc_num_factor_factor", "N",
+      "mean_L", "median_L", "sd_L", "se_L",
+      "mean_K", "median_K", "sd_K", "se_K",
+      "mean_r", "median_r", "sd_r", "se_r",
+      "mean_AUC", "median_AUC", "sd_AUC", "se_AUC",
+      "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
+      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
+      "WT_L", "WT_K", "WT_r", "WT_AUC",
+      "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
+      "Exp_L", "Exp_K", "Exp_r", "Exp_AUC",
+      "Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
+      "mean_Delta_L", "mean_Delta_K", "mean_Delta_r", "mean_Delta_AUC",
+      "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
+      "NG_sum", "DB_sum", "SM_sum"
+    ))) %>%
+    rename(NG = NG_sum, DB = DB_sum, SM = SM_sum)

  df_interactions <- interactions %>%
-    select(
-      any_of(c(
-        group_vars,
-        "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
-        "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
-        "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
-        "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
-        "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
-        "lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC",
-        "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
-        "NG_sum_int", "DB_sum_int", "SM_sum_int",
-        "Z_lm_intercept_L", "Z_lm_slope_L", "Z_lm_R_squared_L", "Z_lm_Score_L",
-        "Z_lm_intercept_K", "Z_lm_slope_K", "Z_lm_R_squared_K", "Z_lm_Score_K",
-        "Z_lm_intercept_r", "Z_lm_slope_r", "Z_lm_R_squared_r", "Z_lm_Score_r",
-        "Z_lm_intercept_AUC", "Z_lm_slope_AUC", "Z_lm_R_squared_AUC", "Z_lm_Score_AUC",
-        "Z_lm_intercept_K_L", "Z_lm_slope_K_L", "Z_lm_R_squared_K_L", "Z_lm_Score_K_L",
-        "Z_lm_intercept_r_L", "Z_lm_slope_r_L", "Z_lm_R_squared_r_L", "Z_lm_Score_r_L",
-        "Z_lm_intercept_AUC_L", "Z_lm_slope_AUC_L", "Z_lm_R_squared_AUC_L", "Z_lm_Score_AUC_L",
-        "Z_lm_intercept_r_K", "Z_lm_slope_r_K", "Z_lm_R_squared_r_K", "Z_lm_Score_r_K",
-        "Z_lm_intercept_AUC_K", "Z_lm_slope_AUC_K", "Z_lm_R_squared_AUC_K", "Z_lm_Score_AUC_K",
-        "Z_lm_intercept_AUC_r", "Z_lm_slope_AUC_r", "Z_lm_R_squared_AUC_r", "Z_lm_Score_AUC_r"
-      ))
-    ) %>%
-      rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int)
+    select(any_of(c(
+      group_vars, # necessary for full_data left_join
+      "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
+      "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
+      "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
+      "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
+      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
+      "lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC",
+      "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
+      "NG_sum_int", "DB_sum_int", "SM_sum_int",
+      "Rank_lm_R_squared_L", "Rank_lm_R_squared_K", "Rank_lm_R_squared_r", "Rank_lm_R_squared_AUC",
+      "Correlation_lm_intercept_L", "Correlation_lm_slope_L", "Correlation_lm_R_squared_L", "Correlation_lm_Score_L",
+      "Correlation_lm_intercept_K", "Correlation_lm_slope_K", "Correlation_lm_R_squared_K", "Correlation_lm_Score_K",
+      "Correlation_lm_intercept_r", "Correlation_lm_slope_r", "Correlation_lm_R_squared_r", "Correlation_lm_Score_r",
+      "Correlation_lm_intercept_AUC", "Correlation_lm_slope_AUC", "Correlation_lm_R_squared_AUC", "Correlation_lm_Score_AUC",
+      "Correlation_lm_intercept_K_L", "Correlation_lm_slope_K_L", "Correlation_lm_R_squared_K_L", "Correlation_lm_Score_K_L",
+      "Correlation_lm_intercept_r_L", "Correlation_lm_slope_r_L", "Correlation_lm_R_squared_r_L", "Correlation_lm_Score_r_L",
+      "Correlation_lm_intercept_AUC_L", "Correlation_lm_slope_AUC_L", "Correlation_lm_R_squared_AUC_L", "Correlation_lm_Score_AUC_L",
+      "Correlation_lm_intercept_r_K", "Correlation_lm_slope_r_K", "Correlation_lm_R_squared_r_K", "Correlation_lm_Score_r_K",
+      "Correlation_lm_intercept_AUC_K", "Correlation_lm_slope_AUC_K", "Correlation_lm_R_squared_AUC_K", "Correlation_lm_Score_AUC_K",
+      "Correlation_lm_intercept_AUC_r", "Correlation_lm_slope_AUC_r", "Correlation_lm_R_squared_AUC_r", "Correlation_lm_Score_AUC_r",
+      "Overlap"
+    ))) %>%
+  rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int)

  # Avoid column collision on left join for overlapping variables
  calculations_no_overlap <- calculations %>%
@@ -1306,12 +1265,11 @@ generate_correlation_plot_configs <- function(df, df_reference) {
      x_var <- paste0("Z_lm_", rel$x)
      y_var <- paste0("Z_lm_", rel$y)
      
-      intercept <- df[[paste0("Z_lm_intercept_", rel$y, "_", rel$x)]][1]
-      slope <- df[[paste0("Z_lm_slope_", rel$y, "_", rel$x)]][1]
-      r_squared <- df[[paste0("Z_lm_R_squared_", rel$y, "_", rel$x)]][1]
+      intercept <- df[[paste0("Correlation_lm_intercept_", rel$y, "_", rel$x)]][1]
+      slope <- df[[paste0("Correlation_lm_slope_", rel$y, "_", rel$x)]][1]
+      r_squared <- df[[paste0("Correlation_lm_R_squared_", rel$y, "_", rel$x)]][1]
      r_squared_rounded <- round(r_squared, 4)
      r_squared_label <- paste("R-squared =", r_squared_rounded)
-
      xmin <- min(c(min(df[[x_var]]), min(df_reference[[x_var]])))
      xmax <- max(c(max(df[[x_var]]), max(df_reference[[x_var]])))