Fix dataframe joining dimensionality

This commit is contained in:
2024-10-02 12:08:44 -04:00
parent 9769bae3d1
commit 5a3fcec55f

View File

@@ -290,7 +290,7 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
}) %>%
ungroup()
# Continue with the rest of the function as before
# Summary statistics for lm scores
lm_means_sds <- calculations %>%
group_by(across(all_of(group_vars))) %>%
summarise(
@@ -340,32 +340,51 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
Z_Shift_L = first(Z_Shift_L),
Z_Shift_K = first(Z_Shift_K),
Z_Shift_r = first(Z_Shift_r),
Z_Shift_AUC = first(Z_Shift_AUC)
Z_Shift_AUC = first(Z_Shift_AUC),
) %>%
arrange(desc(Z_lm_L), desc(NG)) %>%
ungroup() %>%
mutate(
Overlap = case_when(
Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both",
Z_lm_L >= overlap_threshold & Avg_Zscore_L < overlap_threshold ~ "Deletion Enhancer lm only",
Z_lm_L < overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only",
Z_lm_L <= -overlap_threshold & Avg_Zscore_L > -overlap_threshold ~ "Deletion Suppressor lm only",
Z_lm_L > -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only",
Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
TRUE ~ "No Effect"
)
# NG, DB, SM values
NG = first(NG),
DB = first(DB),
SM = first(SM)
)
# Return full data and correlation stats
# Creating the final calculations and interactions dataframes with only required columns for csv output
calculations_df <- calculations %>%
select(
all_of(group_vars),
conc_num, conc_num_factor, conc_num_factor_factor,
N, NG, DB, SM,
mean_L, median_L, sd_L, se_L,
mean_K, median_K, sd_K, se_K,
mean_r, median_r, sd_r, se_r,
mean_AUC, median_AUC, sd_AUC, se_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
WT_L, WT_K, WT_r, WT_AUC,
WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
Exp_L, Exp_K, Exp_r, Exp_AUC,
Delta_L, Delta_K, Delta_r, Delta_AUC,
Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
)
interactions_df <- interactions %>%
select(
all_of(group_vars),
NG, DB, SM,
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC
)
# Use left_join to avoid dimension mismatch issues
full_data <- calculations %>%
left_join(interactions, by = group_vars)
# Return full_data and the two required dataframes (calculations and interactions)
return(list(
calculations = calculations,
interactions = interactions,
calculations = calculations_df,
interactions = interactions_df,
full_data = full_data
))
}