Fix dataframe joining dimensionality

This commit is contained in:
2024-10-02 12:08:44 -04:00
parent 9769bae3d1
commit 5a3fcec55f

View File

@@ -212,7 +212,7 @@ calculate_summary_stats <- function(df, variables, group_vars) {
} }
calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, overlap_threshold = 2) { calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, overlap_threshold = 2) {
# Calculate total concentration variables # Calculate total concentration variables
total_conc_num <- length(unique(df$conc_num)) total_conc_num <- length(unique(df$conc_num))
@@ -290,7 +290,7 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
}) %>% }) %>%
ungroup() ungroup()
# Continue with the rest of the function as before # Summary statistics for lm scores
lm_means_sds <- calculations %>% lm_means_sds <- calculations %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
summarise( summarise(
@@ -314,7 +314,7 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
Z_lm_r = (lm_Score_r - mean_lm_r) / sd_lm_r, Z_lm_r = (lm_Score_r - mean_lm_r) / sd_lm_r,
Z_lm_AUC = (lm_Score_AUC - mean_lm_AUC) / sd_lm_AUC Z_lm_AUC = (lm_Score_AUC - mean_lm_AUC) / sd_lm_AUC
) )
# Build summary stats (interactions) # Build summary stats (interactions)
interactions <- calculations %>% interactions <- calculations %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
@@ -340,32 +340,51 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
Z_Shift_L = first(Z_Shift_L), Z_Shift_L = first(Z_Shift_L),
Z_Shift_K = first(Z_Shift_K), Z_Shift_K = first(Z_Shift_K),
Z_Shift_r = first(Z_Shift_r), Z_Shift_r = first(Z_Shift_r),
Z_Shift_AUC = first(Z_Shift_AUC) Z_Shift_AUC = first(Z_Shift_AUC),
) %>% # NG, DB, SM values
arrange(desc(Z_lm_L), desc(NG)) %>% NG = first(NG),
ungroup() %>% DB = first(DB),
mutate( SM = first(SM)
Overlap = case_when(
Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both",
Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both",
Z_lm_L >= overlap_threshold & Avg_Zscore_L < overlap_threshold ~ "Deletion Enhancer lm only",
Z_lm_L < overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only",
Z_lm_L <= -overlap_threshold & Avg_Zscore_L > -overlap_threshold ~ "Deletion Suppressor lm only",
Z_lm_L > -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only",
Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
TRUE ~ "No Effect"
)
) )
# Return full data and correlation stats # Creating the final calculations and interactions dataframes with only required columns for csv output
calculations_df <- calculations %>%
select(
all_of(group_vars),
conc_num, conc_num_factor, conc_num_factor_factor,
N, NG, DB, SM,
mean_L, median_L, sd_L, se_L,
mean_K, median_K, sd_K, se_K,
mean_r, median_r, sd_r, se_r,
mean_AUC, median_AUC, sd_AUC, se_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
WT_L, WT_K, WT_r, WT_AUC,
WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
Exp_L, Exp_K, Exp_r, Exp_AUC,
Delta_L, Delta_K, Delta_r, Delta_AUC,
Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
)
interactions_df <- interactions %>%
select(
all_of(group_vars),
NG, DB, SM,
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC
)
# Use left_join to avoid dimension mismatch issues
full_data <- calculations %>% full_data <- calculations %>%
left_join(interactions, by = group_vars) left_join(interactions, by = group_vars)
# Return full_data and the two required dataframes (calculations and interactions)
return(list( return(list(
calculations = calculations, calculations = calculations_df,
interactions = interactions, interactions = interactions_df,
full_data = full_data full_data = full_data
)) ))
} }