From 7c0ed3eda13cedccdb383d0c089be9c877815ac9 Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Fri, 13 Sep 2024 18:28:09 -0400 Subject: [PATCH] Improve the interactions df --- .../apps/r/calculate_interaction_zscores.R | 94 +++++++++---------- qhtcp-workflow/qhtcp-workflow | 6 +- 2 files changed, 48 insertions(+), 52 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 5aa61fcc..dbe5cded 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -187,7 +187,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c # Calculate total concentration variables total_conc_num <- length(unique(df$conc_num)) - num_non_removed_concs <- total_conc_num - sum(df$DB, na.rm = TRUE) - 1 # Pull the background means and standard deviations from zero concentration bg_means <- list( @@ -204,6 +203,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first() ) + # Grab these values from the original df before mutating the new stats stats <- df %>% mutate( WT_L = mean_L, @@ -214,9 +214,11 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c WT_sd_K = sd_K, WT_sd_r = sd_r, WT_sd_AUC = sd_AUC - ) %>% + ) + + stats <- stats %>% group_by(OrfRep, Gene, num, conc_num, conc_num_factor) %>% - mutate( + summarise( N = sum(!is.na(L)), NG = sum(NG, na.rm = TRUE), DB = sum(DB, na.rm = TRUE), @@ -229,8 +231,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c sd = ~sd(., na.rm = TRUE), se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA) ), .names = "{.fn}_{.col}") - ) %>% - ungroup() + ) stats <- stats %>% group_by(OrfRep, Gene, num) %>% @@ -274,51 +275,61 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c Zscore_AUC = Delta_AUC / WT_sd_AUC ) - stats <- stats %>% - mutate( + # Calculate linear models + lm_L <- lm(Delta_L ~ conc_num_factor, data = stats) + lm_K <- lm(Delta_K ~ conc_num_factor, data = stats) + lm_r <- lm(Delta_r ~ conc_num_factor, data = stats) + lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = stats) + + interactions <- stats %>% + transmute( + OrfRep = first(OrfRep), + Gene = first(Gene), + Raw_Shift_L = first(Raw_Shift_L), + Raw_Shift_K = first(Raw_Shift_K), + Raw_Shift_r = first(Raw_Shift_r), + Raw_Shift_AUC = first(Raw_Shift_AUC), + Z_Shift_L = first(Z_Shift_L), + Z_Shift_K = first(Z_Shift_K), + Z_Shift_r = first(Z_Shift_r), + Z_Shift_AUC = first(Z_Shift_AUC), Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), - Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) + Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE), + lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1], + lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1], + lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1], + lm_Score_AUC = max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1], + R_Squared_L = summary(lm_L)$r.squared, + R_Squared_K = summary(lm_K)$r.squared, + R_Squared_r = summary(lm_r)$r.squared, + R_Squared_AUC = summary(lm_AUC)$r.squared, + NG = sum(NG, na.rm = TRUE), + DB = sum(DB, na.rm = TRUE), + SM = sum(SM, na.rm = TRUE) ) - # Calculate linear models and store in own df for now - lms <- stats %>% - reframe( - L = lm(Delta_L ~ conc_num_factor), - K = lm(Delta_K ~ conc_num_factor), - r = lm(Delta_r ~ conc_num_factor), - AUC = lm(Delta_AUC ~ conc_num_factor) - ) - - stats <- stats %>% + num_non_removed_concs <- total_conc_num - sum(stats$DB, na.rm = TRUE) - 1 + + interactions <- interactions %>% mutate( Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs, Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1), Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1), - lm_Score_L = max_conc * coef(lms$L)[2] + coef(lms$L)[1], - lm_Score_K = max_conc * coef(lms$K)[2] + coef(lms$K)[1], - lm_Score_r = max_conc * coef(lms$r)[2] + coef(lms$r)[1], - lm_Score_AUC = max_conc * coef(lms$AUC)[2] + coef(lms$AUC)[1], - R_Squared_L = summary(lms$L)$r.squared, - R_Squared_K = summary(lms$K)$r.squared, - R_Squared_r = summary(lms$r)$r.squared, - R_Squared_AUC = summary(lms$AUC)$r.squared - ) - - stats <- stats %>% - mutate( Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE), Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE), Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE), Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE) - ) + ) %>% + arrange(desc(Z_lm_L)) %>% + arrange(desc(NG)) # Declare column order for output calculations <- stats %>% select( - "OrfRep", "Gene", "num", "conc_num", "conc_num_factor", + "OrfRep", "Gene", "conc_num", "conc_num_factor", "N", "mean_L", "mean_K", "mean_r", "mean_AUC", "median_L", "median_K", "median_r", "median_AUC", "sd_L", "sd_K", "sd_r", "sd_AUC", @@ -332,23 +343,8 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c "Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC", "NG", "SM", "DB") - interactions <- stats %>% - select( - "OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r", - "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC", - "lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r", - "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC", - "Sum_Zscore_L", "Sum_Zscore_K", "Sum_Zscore_r", "Sum_Zscore_AUC", - "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC", - "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC", - "NG", "SM", "DB") %>% - arrange(desc(lm_Score_L)) %>% - arrange(desc(NG)) - - print(df, n = 1) - print(calculations, n = 1) - df <- df %>% select(-any_of(setdiff(names(calculations), group_vars))) - df <- left_join(df, calculations, by = group_vars) + df <- df %>% select(-any_of(setdiff(names(calculations), OrfRep, Gene, num, conc_num, conc_num_factor))) + df <- left_join(df, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")) # df <- df %>% select(-any_of(setdiff(names(interactions), group_vars))) # df <- left_join(df, interactions, by = group_vars) diff --git a/qhtcp-workflow/qhtcp-workflow b/qhtcp-workflow/qhtcp-workflow index f753f921..cbd9163d 100755 --- a/qhtcp-workflow/qhtcp-workflow +++ b/qhtcp-workflow/qhtcp-workflow @@ -681,10 +681,10 @@ install_dependencies() { echo "If you do not have sudo access, you may want to use toolbox" case "$(uname -s)" in Linux*|CYGWIN*|MINGW*) - if hash dnf &>/dev/null; then + if command -v dnf &>/dev/null; then ask "Detected Linux RPM platform, continue?" || return 1 sudo dnf install "${depends_rpm[@]}" - elif hash apt &>/dev/null; then + elif command -v apt &>/dev/null; then ask "Detected Linux DEB platform, continue?" || return 1 sudo apt install "${depends_deb[@]}" else @@ -753,7 +753,7 @@ install_dependencies() { fi echo "" - hash "$MATLAB" &>/dev/null || echo "You will also need MATLAB installed for GUI modules" + command -v "$MATLAB" &>/dev/null || echo "You will also need MATLAB installed for GUI modules" }