Reorganize calculations to prevent column clobbering

This commit is contained in:
2024-10-07 18:05:23 -04:00
parent a23565fad4
commit aef0fba1da

View File

@@ -265,7 +265,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
df <- df %>%
left_join(mean_zeroes, by = c(group_vars))
# Calculate Raw Shifts and Z Shifts
# Calculate Raw Shifts and Z Shifts for all rows
df <- df %>%
mutate(
Raw_Shift_L = mean_L_zero - WT_L,
@@ -312,39 +312,6 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
) %>%
ungroup()
calculations <- calculations %>%
group_by(across(all_of(group_vars))) %>%
mutate(
# Apply the simple LM function for each variable
lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)),
lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)),
lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)),
lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)),
# Extract coefficients and statistics for each model
lm_intercept_L = lm_L[[1]]$intercept,
lm_slope_L = lm_L[[1]]$slope,
R_Squared_L = lm_L[[1]]$r_squared,
lm_Score_L = lm_L[[1]]$score,
lm_intercept_K = lm_K[[1]]$intercept,
lm_slope_K = lm_K[[1]]$slope,
R_Squared_K = lm_K[[1]]$r_squared,
lm_Score_K = lm_K[[1]]$score,
lm_intercept_r = lm_r[[1]]$intercept,
lm_slope_r = lm_r[[1]]$slope,
R_Squared_r = lm_r[[1]]$r_squared,
lm_Score_r = lm_r[[1]]$score,
lm_intercept_AUC = lm_AUC[[1]]$intercept,
lm_slope_AUC = lm_AUC[[1]]$slope,
R_Squared_AUC = lm_AUC[[1]]$r_squared,
lm_Score_AUC = lm_AUC[[1]]$score
) %>%
select(-lm_L, -lm_K, -lm_r, -lm_AUC) %>%
ungroup()
# For interaction plot error bars
delta_means_sds <- calculations %>%
group_by(across(all_of(group_vars))) %>%
@@ -363,8 +330,69 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
calculations <- calculations %>%
left_join(delta_means_sds, by = group_vars)
# Calculate group-specific interactions
interactions <- calculations %>%
group_by(across(all_of(group_vars))) %>%
summarise(
NG_sum_int = sum(NG),
DB_sum_int = sum(DB),
SM_sum_int = sum(SM),
num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
# Add background data
Raw_Shift_L = first(Raw_Shift_L),
Raw_Shift_K = first(Raw_Shift_K),
Raw_Shift_r = first(Raw_Shift_r),
Raw_Shift_AUC = first(Raw_Shift_AUC),
Z_Shift_L = first(Z_Shift_L),
Z_Shift_K = first(Z_Shift_K),
Z_Shift_r = first(Z_Shift_r),
Z_Shift_AUC = first(Z_Shift_AUC),
# Sum Z-scores
Sum_Z_Score_L = sum(Zscore_L, na.rm = TRUE),
Sum_Z_Score_K = sum(Zscore_K, na.rm = TRUE),
Sum_Z_Score_r = sum(Zscore_r, na.rm = TRUE),
Sum_Z_Score_AUC = sum(Zscore_AUC, na.rm = TRUE),
# We sum twice but it saves on creating another block
# TODO should we use mean() here, not sure
Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / first(total_conc_num - 1),
Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / first(total_conc_num - 1),
# Perform gene-gene linear modeling
lm_L = list(perform_lm(Delta_L, conc_num_factor, max_conc)),
lm_K = list(perform_lm(Delta_K, conc_num_factor, max_conc)),
lm_r = list(perform_lm(Delta_r, conc_num_factor, max_conc)),
lm_AUC = list(perform_lm(Delta_AUC, conc_num_factor, max_conc)),
# Extract coefficients and statistics for each model
lm_intercept_L = lm_L[[1]]$intercept,
lm_slope_L = lm_L[[1]]$slope,
R_Squared_L = lm_L[[1]]$r_squared,
lm_Score_L = lm_L[[1]]$score,
lm_intercept_K = lm_K[[1]]$intercept,
lm_slope_K = lm_K[[1]]$slope,
R_Squared_K = lm_K[[1]]$r_squared,
lm_Score_K = lm_K[[1]]$score,
lm_intercept_r = lm_r[[1]]$intercept,
lm_slope_r = lm_r[[1]]$slope,
R_Squared_r = lm_r[[1]]$r_squared,
lm_Score_r = lm_r[[1]]$score,
lm_intercept_AUC = lm_AUC[[1]]$intercept,
lm_slope_AUC = lm_AUC[[1]]$slope,
R_Squared_AUC = lm_AUC[[1]]$r_squared,
lm_Score_AUC = lm_AUC[[1]]$score,
.groups = "drop"
) %>%
select(-c(lm_L, lm_K, lm_r, lm_AUC)) # drop linear models since we have coefficients
# Summary statistics for lm scores
calculations <- calculations %>%
interactions <- interactions %>%
# group_by(across(all_of(group_vars))) %>%
mutate(
lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
lm_sd_L = sd(lm_Score_L, na.rm = TRUE),
@@ -378,60 +406,6 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
)
# Build summary stats (interactions)
interactions <- calculations %>%
group_by(across(all_of(group_vars))) %>%
summarise(
num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
Sum_Z_Score_L = sum(Zscore_L, na.rm = TRUE),
Sum_Z_Score_K = sum(Zscore_K, na.rm = TRUE),
Sum_Z_Score_r = sum(Zscore_r, na.rm = TRUE),
Sum_Z_Score_AUC = sum(Zscore_AUC, na.rm = TRUE),
Avg_Zscore_L = Sum_Z_Score_L / first(num_non_removed_concs),
Avg_Zscore_K = Sum_Z_Score_K / first(num_non_removed_concs),
Avg_Zscore_r = Sum_Z_Score_r / first(num_non_removed_concs),
Avg_Zscore_AUC = Sum_Z_Score_AUC / first(num_non_removed_concs),
# R_Squared
R_Squared_L = first(R_Squared_L),
R_Squared_K = first(R_Squared_K),
R_Squared_r = first(R_Squared_r),
R_Squared_AUC = first(R_Squared_AUC),
# Interaction Z-scores
Z_lm_L = first(Z_lm_L),
Z_lm_K = first(Z_lm_K),
Z_lm_r = first(Z_lm_r),
Z_lm_AUC = first(Z_lm_AUC),
# Raw Shifts
Raw_Shift_L = first(Raw_Shift_L),
Raw_Shift_K = first(Raw_Shift_K),
Raw_Shift_r = first(Raw_Shift_r),
Raw_Shift_AUC = first(Raw_Shift_AUC),
# Z Shifts
Z_Shift_L = first(Z_Shift_L),
Z_Shift_K = first(Z_Shift_K),
Z_Shift_r = first(Z_Shift_r),
Z_Shift_AUC = first(Z_Shift_AUC),
# Gene-Gene Interaction
lm_Score_L = first(lm_Score_L),
lm_Score_K = first(lm_Score_K),
lm_Score_r = first(lm_Score_r),
lm_Score_AUC = first(lm_Score_AUC),
# NG, DB, SM values
NG_sum_int = sum(NG),
DB_sum_int = sum(DB),
SM_sum_int = sum(SM),
.groups = "drop"
) %>%
arrange(desc(Z_lm_L), desc(NG_sum_int))
@@ -446,9 +420,7 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Z_lm_L_adjusted = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
Z_lm_K_adjusted = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
Z_lm_r_adjusted = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC)
) %>%
mutate(
Z_lm_AUC_adjusted = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC),
Rank_L = rank(Avg_Zscore_L_adjusted),
Rank_K = rank(Avg_Zscore_K_adjusted),
Rank_r = rank(Avg_Zscore_r_adjusted),
@@ -456,13 +428,21 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Rank_lm_L = rank(Z_lm_L_adjusted),
Rank_lm_K = rank(Z_lm_K_adjusted),
Rank_lm_r = rank(Z_lm_r_adjusted),
Rank_lm_AUC = rank(Z_lm_AUC_adjusted)
) %>%
mutate(
lm_R_squared_rank_L = summary(lm(Rank_lm_L ~ Rank_L, data = .))$r.squared,
lm_R_squared_rank_K = summary(lm(Rank_lm_K ~ Rank_K, data = .))$r.squared,
lm_R_squared_rank_r = summary(lm(Rank_lm_r ~ Rank_r, data = .))$r.squared,
lm_R_squared_rank_AUC = summary(lm(Rank_lm_AUC ~ Rank_AUC, data = .))$r.squared
Rank_lm_AUC = rank(Z_lm_AUC_adjusted),
Rank_lm_L = list(perform_lm(Rank_lm_L, Rank_L, max_conc)),
Rank_lm_K = list(perform_lm(Rank_lm_K, Rank_K, max_conc)),
Rank_lm_r = list(perform_lm(Rank_lm_r, Rank_r, max_conc)),
Rank_lm_AUC = list(perform_lm(Rank_lm_AUC, Rank_AUC, max_conc)),
Correlation_lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
Correlation_lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
Correlation_lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
Correlation_lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),
Correlation_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)),
Correlation_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)),
Correlation_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)),
Correlation_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)),
Correlation_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)),
Correlation_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc))
)
# Add overlap threshold categories based on Z-lm and Avg-Z scores
@@ -480,122 +460,101 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2)
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore",
TRUE ~ "No Effect"
),
# Apply the perform_lm function for each variable pair
lm_L = list(perform_lm(Z_lm_L, Avg_Zscore_L, max_conc)),
lm_K = list(perform_lm(Z_lm_K, Avg_Zscore_K, max_conc)),
lm_r = list(perform_lm(Z_lm_r, Avg_Zscore_r, max_conc)),
lm_AUC = list(perform_lm(Z_lm_AUC, Avg_Zscore_AUC, max_conc)),
# Correlation models for various pairs
Z_lm_K_L = list(perform_lm(Z_lm_K, Z_lm_L, max_conc)),
Z_lm_r_L = list(perform_lm(Z_lm_r, Z_lm_L, max_conc)),
Z_lm_AUC_L = list(perform_lm(Z_lm_AUC, Z_lm_L, max_conc)),
Z_lm_r_K = list(perform_lm(Z_lm_r, Z_lm_K, max_conc)),
Z_lm_AUC_K = list(perform_lm(Z_lm_AUC, Z_lm_K, max_conc)),
Z_lm_AUC_r = list(perform_lm(Z_lm_AUC, Z_lm_r, max_conc)),
Rank_lm_R_squared_L = Rank_lm_L[[1]]$r_squared,
Rank_lm_R_squared_K = Rank_lm_L[[1]]$r_squared,
Rank_lm_R_squared_r = Rank_lm_r[[1]]$r_squared,
Rank_lm_R_squared_AUC = Rank_lm_AUC[[1]]$r_squared,
Correlation_lm_intercept_L = Correlation_lm_L[[1]]$intercept,
Correlation_lm_slope_L = Correlation_lm_L[[1]]$slope,
Correlation_lm_R_Squared_L = Correlation_lm_L[[1]]$r_squared,
Correlation_lm_Score_L = Correlation_lm_L[[1]]$score,
Correlation_lm_intercept_K = Correlation_lm_K[[1]]$intercept,
Correlation_lm_slope_K = Correlation_lm_K[[1]]$slope,
Correlation_lm_R_Squared_K = Correlation_lm_K[[1]]$r_squared,
Correlation_lm_Score_K = Correlation_lm_K[[1]]$score,
Correlation_lm_intercept_r = Correlation_lm_r[[1]]$intercept,
Correlation_lm_slope_r = Correlation_lm_r[[1]]$slope,
Correlation_lm_R_Squared_r = Correlation_lm_r[[1]]$r_squared,
Correlation_lm_Score_r = Correlation_lm_r[[1]]$score,
Correlation_lm_intercept_AUC = Correlation_lm_AUC[[1]]$intercept,
Correlation_lm_slope_AUC = Correlation_lm_AUC[[1]]$slope,
Correlation_lm_R_Squared_AUC = Correlation_lm_AUC[[1]]$r_squared,
Correlation_lm_Score_AUC = Correlation_lm_AUC[[1]]$score,
Correlation_lm_intercept_K_L = Correlation_lm_K_L[[1]]$intercept,
Correlation_lm_slope_K_L = Correlation_lm_K_L[[1]]$slope,
Correlation_lm_R_squared_K_L = Correlation_lm_K_L[[1]]$r_squared,
Correlation_lm_Score_K_L = Correlation_lm_K_L[[1]]$score,
Correlation_lm_intercept_r_L = Correlation_lm_r_L[[1]]$intercept,
Correlation_lm_slope_r_L = Correlation_lm_r_L[[1]]$slope,
Correlation_lm_R_squared_r_L = Correlation_lm_r_L[[1]]$r_squared,
Correlation_lm_Score_r_L = Correlation_lm_r_L[[1]]$score,
Correlation_lm_intercept_AUC_L = Correlation_lm_AUC_L[[1]]$intercept,
Correlation_lm_slope_AUC_L = Correlation_lm_AUC_L[[1]]$slope,
Correlation_lm_R_squared_AUC_L = Correlation_lm_AUC_L[[1]]$r_squared,
Correlation_lm_Score_AUC_L = Correlation_lm_AUC_L[[1]]$score,
Correlation_lm_intercept_r_K = Correlation_lm_r_K[[1]]$intercept,
Correlation_lm_slope_r_K = Correlation_lm_r_K[[1]]$slope,
Correlation_lm_R_squared_r_K = Correlation_lm_r_K[[1]]$r_squared,
Correlation_lm_Score_r_K = Correlation_lm_r_K[[1]]$score,
Correlation_lm_intercept_AUC_K = Correlation_lm_AUC_K[[1]]$intercept,
Correlation_lm_slope_AUC_K = Correlation_lm_AUC_K[[1]]$slope,
Correlation_lm_R_squared_AUC_K = Correlation_lm_AUC_K[[1]]$r_squared,
Correlation_lm_Score_AUC_K = Correlation_lm_AUC_K[[1]]$score,
Correlation_lm_intercept_AUC_r = Correlation_lm_AUC_r[[1]]$intercept,
Correlation_lm_slope_AUC_r = Correlation_lm_AUC_r[[1]]$slope,
Correlation_lm_R_squared_AUC_r = Correlation_lm_AUC_r[[1]]$r_squared,
Correlation_lm_Score_AUC_r = Correlation_lm_AUC_r[[1]]$score
)
}
# Extract coefficients and statistics for each model
lm_rank_intercept_L = lm_L[[1]]$intercept,
lm_rank_slope_L = lm_L[[1]]$slope,
R_Squared_L = lm_L[[1]]$r_squared,
lm_Score_L = lm_L[[1]]$score,
lm_intercept_K = lm_K[[1]]$intercept,
lm_slope_K = lm_K[[1]]$slope,
R_Squared_K = lm_K[[1]]$r_squared,
lm_Score_K = lm_K[[1]]$score,
lm_intercept_r = lm_r[[1]]$intercept,
lm_slope_r = lm_r[[1]]$slope,
R_Squared_r = lm_r[[1]]$r_squared,
lm_Score_r = lm_r[[1]]$score,
lm_intercept_AUC = lm_AUC[[1]]$intercept,
lm_slope_AUC = lm_AUC[[1]]$slope,
R_Squared_AUC = lm_AUC[[1]]$r_squared,
lm_Score_AUC = lm_AUC[[1]]$score,
Z_lm_intercept_K_L = Z_lm_K_L[[1]]$intercept,
Z_lm_slope_K_L = Z_lm_K_L[[1]]$slope,
Z_lm_R_squared_K_L = Z_lm_K_L[[1]]$r_squared,
Z_lm_Score_K_L = Z_lm_K_L[[1]]$score,
Z_lm_intercept_r_L = Z_lm_r_L[[1]]$intercept,
Z_lm_slope_r_L = Z_lm_r_L[[1]]$slope,
Z_lm_R_squared_r_L = Z_lm_r_L[[1]]$r_squared,
Z_lm_Score_r_L = Z_lm_r_L[[1]]$score,
Z_lm_intercept_AUC_L = Z_lm_AUC_L[[1]]$intercept,
Z_lm_slope_AUC_L = Z_lm_AUC_L[[1]]$slope,
Z_lm_R_squared_AUC_L = Z_lm_AUC_L[[1]]$r_squared,
Z_lm_Score_AUC_L = Z_lm_AUC_L[[1]]$score,
Z_lm_intercept_r_K = Z_lm_r_K[[1]]$intercept,
Z_lm_slope_r_K = Z_lm_r_K[[1]]$slope,
Z_lm_R_squared_r_K = Z_lm_r_K[[1]]$r_squared,
Z_lm_Score_r_K = Z_lm_r_K[[1]]$score,
Z_lm_intercept_AUC_K = Z_lm_AUC_K[[1]]$intercept,
Z_lm_slope_AUC_K = Z_lm_AUC_K[[1]]$slope,
Z_lm_R_squared_AUC_K = Z_lm_AUC_K[[1]]$r_squared,
Z_lm_Score_AUC_K = Z_lm_AUC_K[[1]]$score,
Z_lm_intercept_AUC_r = Z_lm_AUC_r[[1]]$intercept,
Z_lm_slope_AUC_r = Z_lm_AUC_r[[1]]$slope,
Z_lm_R_squared_AUC_r = Z_lm_AUC_r[[1]]$r_squared,
Z_lm_Score_AUC_r = Z_lm_AUC_r[[1]]$score
) %>%
select(
-lm_L, -lm_K, -lm_r, -lm_AUC,
-Z_lm_K_L, -Z_lm_r_L, -Z_lm_AUC_L, -Z_lm_r_K, -Z_lm_AUC_K, -Z_lm_AUC_r)
} # end deletion-specific block
# Create the final calculations and interactions dataframes with only required columns for csv output
# Create the final calculations and interactions dataframes with specific columns for csv output
# Trying to mimic original output data
df_calculations <- calculations %>%
select(
all_of(group_vars),
conc_num, conc_num_factor, conc_num_factor_factor, N,
mean_L, median_L, sd_L, se_L,
mean_K, median_K, sd_K, se_K,
mean_r, median_r, sd_r, se_r,
mean_AUC, median_AUC, sd_AUC, se_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
WT_L, WT_K, WT_r, WT_AUC,
WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
Exp_L, Exp_K, Exp_r, Exp_AUC,
Delta_L, Delta_K, Delta_r, Delta_AUC,
mean_Delta_L, mean_Delta_K, mean_Delta_r, mean_Delta_AUC,
Zscore_L, Zscore_K, Zscore_r, Zscore_AUC,
NG_sum, DB_sum, SM_sum
) %>%
rename(NG = NG_sum, DB = DB_sum, SM = SM_sum)
select(all_of(c(
group_vars, # necessary for full_data left_join
"conc_num", "conc_num_factor", "conc_num_factor_factor", "N",
"mean_L", "median_L", "sd_L", "se_L",
"mean_K", "median_K", "sd_K", "se_K",
"mean_r", "median_r", "sd_r", "se_r",
"mean_AUC", "median_AUC", "sd_AUC", "se_AUC",
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
"WT_L", "WT_K", "WT_r", "WT_AUC",
"WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
"Exp_L", "Exp_K", "Exp_r", "Exp_AUC",
"Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
"mean_Delta_L", "mean_Delta_K", "mean_Delta_r", "mean_Delta_AUC",
"Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
"NG_sum", "DB_sum", "SM_sum"
))) %>%
rename(NG = NG_sum, DB = DB_sum, SM = SM_sum)
df_interactions <- interactions %>%
select(
any_of(c(
group_vars,
"Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
"Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
"Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
"lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC",
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
"NG_sum_int", "DB_sum_int", "SM_sum_int",
"Z_lm_intercept_L", "Z_lm_slope_L", "Z_lm_R_squared_L", "Z_lm_Score_L",
"Z_lm_intercept_K", "Z_lm_slope_K", "Z_lm_R_squared_K", "Z_lm_Score_K",
"Z_lm_intercept_r", "Z_lm_slope_r", "Z_lm_R_squared_r", "Z_lm_Score_r",
"Z_lm_intercept_AUC", "Z_lm_slope_AUC", "Z_lm_R_squared_AUC", "Z_lm_Score_AUC",
"Z_lm_intercept_K_L", "Z_lm_slope_K_L", "Z_lm_R_squared_K_L", "Z_lm_Score_K_L",
"Z_lm_intercept_r_L", "Z_lm_slope_r_L", "Z_lm_R_squared_r_L", "Z_lm_Score_r_L",
"Z_lm_intercept_AUC_L", "Z_lm_slope_AUC_L", "Z_lm_R_squared_AUC_L", "Z_lm_Score_AUC_L",
"Z_lm_intercept_r_K", "Z_lm_slope_r_K", "Z_lm_R_squared_r_K", "Z_lm_Score_r_K",
"Z_lm_intercept_AUC_K", "Z_lm_slope_AUC_K", "Z_lm_R_squared_AUC_K", "Z_lm_Score_AUC_K",
"Z_lm_intercept_AUC_r", "Z_lm_slope_AUC_r", "Z_lm_R_squared_AUC_r", "Z_lm_Score_AUC_r"
))
) %>%
rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int)
select(any_of(c(
group_vars, # necessary for full_data left_join
"Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
"Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
"Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
"lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC",
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
"NG_sum_int", "DB_sum_int", "SM_sum_int",
"Rank_lm_R_squared_L", "Rank_lm_R_squared_K", "Rank_lm_R_squared_r", "Rank_lm_R_squared_AUC",
"Correlation_lm_intercept_L", "Correlation_lm_slope_L", "Correlation_lm_R_squared_L", "Correlation_lm_Score_L",
"Correlation_lm_intercept_K", "Correlation_lm_slope_K", "Correlation_lm_R_squared_K", "Correlation_lm_Score_K",
"Correlation_lm_intercept_r", "Correlation_lm_slope_r", "Correlation_lm_R_squared_r", "Correlation_lm_Score_r",
"Correlation_lm_intercept_AUC", "Correlation_lm_slope_AUC", "Correlation_lm_R_squared_AUC", "Correlation_lm_Score_AUC",
"Correlation_lm_intercept_K_L", "Correlation_lm_slope_K_L", "Correlation_lm_R_squared_K_L", "Correlation_lm_Score_K_L",
"Correlation_lm_intercept_r_L", "Correlation_lm_slope_r_L", "Correlation_lm_R_squared_r_L", "Correlation_lm_Score_r_L",
"Correlation_lm_intercept_AUC_L", "Correlation_lm_slope_AUC_L", "Correlation_lm_R_squared_AUC_L", "Correlation_lm_Score_AUC_L",
"Correlation_lm_intercept_r_K", "Correlation_lm_slope_r_K", "Correlation_lm_R_squared_r_K", "Correlation_lm_Score_r_K",
"Correlation_lm_intercept_AUC_K", "Correlation_lm_slope_AUC_K", "Correlation_lm_R_squared_AUC_K", "Correlation_lm_Score_AUC_K",
"Correlation_lm_intercept_AUC_r", "Correlation_lm_slope_AUC_r", "Correlation_lm_R_squared_AUC_r", "Correlation_lm_Score_AUC_r",
"Overlap"
))) %>%
rename(NG = NG_sum_int, DB = DB_sum_int, SM = SM_sum_int)
# Avoid column collision on left join for overlapping variables
calculations_no_overlap <- calculations %>%
@@ -1306,12 +1265,11 @@ generate_correlation_plot_configs <- function(df, df_reference) {
x_var <- paste0("Z_lm_", rel$x)
y_var <- paste0("Z_lm_", rel$y)
intercept <- df[[paste0("Z_lm_intercept_", rel$y, "_", rel$x)]][1]
slope <- df[[paste0("Z_lm_slope_", rel$y, "_", rel$x)]][1]
r_squared <- df[[paste0("Z_lm_R_squared_", rel$y, "_", rel$x)]][1]
intercept <- df[[paste0("Correlation_lm_intercept_", rel$y, "_", rel$x)]][1]
slope <- df[[paste0("Correlation_lm_slope_", rel$y, "_", rel$x)]][1]
r_squared <- df[[paste0("Correlation_lm_R_squared_", rel$y, "_", rel$x)]][1]
r_squared_rounded <- round(r_squared, 4)
r_squared_label <- paste("R-squared =", r_squared_rounded)
xmin <- min(c(min(df[[x_var]]), min(df_reference[[x_var]])))
xmax <- max(c(max(df[[x_var]]), max(df_reference[[x_var]])))