From 5a3fcec55f5495f987af367b8a4bb06d22071409 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Wed, 2 Oct 2024 12:08:44 -0400 Subject: [PATCH] Fix dataframe joining dimensionality --- .../apps/r/calculate_interaction_zscores.R | 65 ++++++++++++------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 9cabf0db..5d742082 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -212,7 +212,7 @@ calculate_summary_stats <- function(df, variables, group_vars) { } calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, overlap_threshold = 2) { - + # Calculate total concentration variables total_conc_num <- length(unique(df$conc_num)) @@ -290,7 +290,7 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove }) %>% ungroup() - # Continue with the rest of the function as before + # Summary statistics for lm scores lm_means_sds <- calculations %>% group_by(across(all_of(group_vars))) %>% summarise( @@ -314,7 +314,7 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove Z_lm_r = (lm_Score_r - mean_lm_r) / sd_lm_r, Z_lm_AUC = (lm_Score_AUC - mean_lm_AUC) / sd_lm_AUC ) - + # Build summary stats (interactions) interactions <- calculations %>% group_by(across(all_of(group_vars))) %>% @@ -340,32 +340,51 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove Z_Shift_L = first(Z_Shift_L), Z_Shift_K = first(Z_Shift_K), Z_Shift_r = first(Z_Shift_r), - Z_Shift_AUC = first(Z_Shift_AUC) + Z_Shift_AUC = first(Z_Shift_AUC), - ) %>% - arrange(desc(Z_lm_L), desc(NG)) %>% - ungroup() %>% - mutate( - Overlap = case_when( - Z_lm_L >= overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Both", - Z_lm_L <= -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Both", - Z_lm_L >= overlap_threshold & Avg_Zscore_L < overlap_threshold ~ "Deletion Enhancer lm only", - Z_lm_L < overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Enhancer Avg Zscore only", - Z_lm_L <= -overlap_threshold & Avg_Zscore_L > -overlap_threshold ~ "Deletion Suppressor lm only", - Z_lm_L > -overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Suppressor Avg Zscore only", - Z_lm_L >= overlap_threshold & Avg_Zscore_L <= -overlap_threshold ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score", - Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score", - TRUE ~ "No Effect" - ) + # NG, DB, SM values + NG = first(NG), + DB = first(DB), + SM = first(SM) ) - # Return full data and correlation stats + # Creating the final calculations and interactions dataframes with only required columns for csv output + calculations_df <- calculations %>% + select( + all_of(group_vars), + conc_num, conc_num_factor, conc_num_factor_factor, + N, NG, DB, SM, + mean_L, median_L, sd_L, se_L, + mean_K, median_K, sd_K, se_K, + mean_r, median_r, sd_r, se_r, + mean_AUC, median_AUC, sd_AUC, se_AUC, + Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC, + Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC, + WT_L, WT_K, WT_r, WT_AUC, + WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC, + Exp_L, Exp_K, Exp_r, Exp_AUC, + Delta_L, Delta_K, Delta_r, Delta_AUC, + Zscore_L, Zscore_K, Zscore_r, Zscore_AUC + ) + + interactions_df <- interactions %>% + select( + all_of(group_vars), + NG, DB, SM, + Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC, + Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC, + Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC, + Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC + ) + + # Use left_join to avoid dimension mismatch issues full_data <- calculations %>% left_join(interactions, by = group_vars) - + + # Return full_data and the two required dataframes (calculations and interactions) return(list( - calculations = calculations, - interactions = interactions, + calculations = calculations_df, + interactions = interactions_df, full_data = full_data )) }