diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 72268120..143d69e8 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -217,6 +217,20 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) group_vars <- c("OrfRep", "Gene", "Drug") } + perform_lm_simple <- function(x, y, max_conc) { + if (all(is.na(x)) || all(is.na(y)) || length(x[!is.na(x)]) == 0 || length(y[!is.na(y)]) == 0) { + return(list(intercept = NA, slope = NA, r_squared = NA, score = NA)) + } else { + fit <- lm(y ~ x) + return(list( + intercept = coef(fit)[1], + slope = coef(fit)[2], + r_squared = summary(fit)$r.squared, + score = max_conc * coef(fit)[2] + coef(fit)[1] + )) + } + } + # Calculate WT statistics from df_bg wt_stats <- df_bg %>% group_by(across(all_of(bg_group_vars))) %>% @@ -301,78 +315,35 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) calculations <- calculations %>% group_by(across(all_of(group_vars))) %>% mutate( - lm_L = map2(.x = Delta_L, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.x ~ .y) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - lm_K = map2(.x = Delta_K, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.x ~ .y) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - lm_r = map2(.x = Delta_r, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.x ~ .y) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - lm_AUC = map2(.x = Delta_AUC, .y = conc_num_factor, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.x ~ .y) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), + # Apply the simple LM function for each variable + lm_L = list(perform_lm_simple(Delta_L, conc_num_factor, max_conc)), + lm_K = list(perform_lm_simple(Delta_K, conc_num_factor, max_conc)), + lm_r = list(perform_lm_simple(Delta_r, conc_num_factor, max_conc)), + lm_AUC = list(perform_lm_simple(Delta_AUC, conc_num_factor, max_conc)), # Extract coefficients and statistics for each model - lm_intercept_L = map_dbl(lm_L, "intercept"), - lm_slope_L = map_dbl(lm_L, "slope"), - R_Squared_L = map_dbl(lm_L, "r_squared"), - lm_Score_L = map_dbl(lm_L, "score"), + lm_intercept_L = lm_L[[1]]$intercept, + lm_slope_L = lm_L[[1]]$slope, + R_Squared_L = lm_L[[1]]$r_squared, + lm_Score_L = lm_L[[1]]$score, - lm_intercept_K = map_dbl(lm_K, "intercept"), - lm_slope_K = map_dbl(lm_K, "slope"), - R_Squared_K = map_dbl(lm_K, "r_squared"), - lm_Score_K = map_dbl(lm_K, "score"), + lm_intercept_K = lm_K[[1]]$intercept, + lm_slope_K = lm_K[[1]]$slope, + R_Squared_K = lm_K[[1]]$r_squared, + lm_Score_K = lm_K[[1]]$score, - lm_intercept_r = map_dbl(lm_r, "intercept"), - lm_slope_r = map_dbl(lm_r, "slope"), - R_Squared_r = map_dbl(lm_r, "r_squared"), - lm_Score_r = map_dbl(lm_r, "score"), + lm_intercept_r = lm_r[[1]]$intercept, + lm_slope_r = lm_r[[1]]$slope, + R_Squared_r = lm_r[[1]]$r_squared, + lm_Score_r = lm_r[[1]]$score, - lm_intercept_AUC = map_dbl(lm_AUC, "intercept"), - lm_slope_AUC = map_dbl(lm_AUC, "slope"), - R_Squared_AUC = map_dbl(lm_AUC, "r_squared"), - lm_Score_AUC = map_dbl(lm_AUC, "score") - ) %>% - select(-lm_L, -lm_K, -lm_r, -lm_AUC) %>% - ungroup() + lm_intercept_AUC = lm_AUC[[1]]$intercept, + lm_slope_AUC = lm_AUC[[1]]$slope, + R_Squared_AUC = lm_AUC[[1]]$r_squared, + lm_Score_AUC = lm_AUC[[1]]$score + ) %>% + select(-lm_L, -lm_K, -lm_r, -lm_AUC) %>% + ungroup() # For interaction plot error bars delta_means_sds <- calculations %>% @@ -509,179 +480,70 @@ calculate_interaction_scores <- function(df, df_bg, type, overlap_threshold = 2) Z_lm_L <= -overlap_threshold & Avg_Zscore_L >= overlap_threshold ~ "Deletion Suppressor lm, Deletion Enhancer Avg Zscore", TRUE ~ "No Effect" ), - lm_L = map2(.x = Z_lm_L, .y = Avg_Zscore_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.y ~ .x) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - lm_K = map2(.x = Z_lm_K, .y = Avg_Zscore_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.y ~ .x) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - lm_r = map2(.x = Z_lm_r, .y = Avg_Zscore_r, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.y ~ .x) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - lm_AUC = map2(.x = Z_lm_AUC, .y = Avg_Zscore_AUC, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.y ~ .x) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), + # Apply the perform_lm_simple function for each variable pair + lm_L = list(perform_lm_simple(Z_lm_L, Avg_Zscore_L, max_conc)), + lm_K = list(perform_lm_simple(Z_lm_K, Avg_Zscore_K, max_conc)), + lm_r = list(perform_lm_simple(Z_lm_r, Avg_Zscore_r, max_conc)), + lm_AUC = list(perform_lm_simple(Z_lm_AUC, Avg_Zscore_AUC, max_conc)), - # For correlation plots - Z_lm_K_L = map2(.x = Z_lm_K, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.y ~ .x) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - Z_lm_r_L = map2(.x = Z_lm_r, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.y ~ .x) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - Z_lm_R_AUC_L = map2(.x = Z_lm_AUC, .y = Z_lm_L, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.y ~ .x) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - Z_lm_R_r_K = map2(.x = Z_lm_r, .y = Z_lm_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.y ~ .x) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - Z_lm_R_AUC_K = map2(.x = Z_lm_AUC, .y = Z_lm_K, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.y ~ .x) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), - Z_lm_R_AUC_r = map2(.x = Z_lm_AUC, .y = Z_lm_r, ~ if (all(is.na(.x)) || all(is.na(.y)) || length(.x[!is.na(.x)]) == 0) { - list(intercept = NA, slope = NA, r_squared = NA, score = NA) - } else { - lm(.y ~ .x) %>% { - list( - intercept = coef(.)[1], - slope = coef(.)[2], - r_squared = summary(.)$r.squared, - score = max_conc * coef(.)[2] + coef(.)[1] - ) - } - }), + # Correlation models for various pairs + Z_lm_K_L = list(perform_lm_simple(Z_lm_K, Z_lm_L, max_conc)), + Z_lm_r_L = list(perform_lm_simple(Z_lm_r, Z_lm_L, max_conc)), + Z_lm_R_AUC_L = list(perform_lm_simple(Z_lm_AUC, Z_lm_L, max_conc)), + Z_lm_R_r_K = list(perform_lm_simple(Z_lm_r, Z_lm_K, max_conc)), + Z_lm_R_AUC_K = list(perform_lm_simple(Z_lm_AUC, Z_lm_K, max_conc)), + Z_lm_R_AUC_r = list(perform_lm_simple(Z_lm_AUC, Z_lm_r, max_conc)), # Extract coefficients and statistics for each model - lm_rank_intercept_L = map_dbl(lm_L, "intercept"), - lm_rank_slope_L = map_dbl(lm_L, "slope"), - R_Squared_L = map_dbl(lm_L, "r_squared"), - lm_Score_L = map_dbl(lm_L, "score"), + lm_rank_intercept_L = lm_L[[1]]$intercept, + lm_rank_slope_L = lm_L[[1]]$slope, + R_Squared_L = lm_L[[1]]$r_squared, + lm_Score_L = lm_L[[1]]$score, - lm_intercept_K = map_dbl(lm_K, "intercept"), - lm_slope_K = map_dbl(lm_K, "slope"), - R_Squared_K = map_dbl(lm_K, "r_squared"), - lm_Score_K = map_dbl(lm_K, "score"), + lm_intercept_K = lm_K[[1]]$intercept, + lm_slope_K = lm_K[[1]]$slope, + R_Squared_K = lm_K[[1]]$r_squared, + lm_Score_K = lm_K[[1]]$score, - lm_intercept_r = map_dbl(lm_r, "intercept"), - lm_slope_r = map_dbl(lm_r, "slope"), - R_Squared_r = map_dbl(lm_r, "r_squared"), - lm_Score_r = map_dbl(lm_r, "score"), + lm_intercept_r = lm_r[[1]]$intercept, + lm_slope_r = lm_r[[1]]$slope, + R_Squared_r = lm_r[[1]]$r_squared, + lm_Score_r = lm_r[[1]]$score, - lm_intercept_AUC = map_dbl(lm_AUC, "intercept"), - lm_slope_AUC = map_dbl(lm_AUC, "slope"), - R_Squared_AUC = map_dbl(lm_AUC, "r_squared"), - lm_Score_AUC = map_dbl(lm_AUC, "score"), + lm_intercept_AUC = lm_AUC[[1]]$intercept, + lm_slope_AUC = lm_AUC[[1]]$slope, + R_Squared_AUC = lm_AUC[[1]]$r_squared, + lm_Score_AUC = lm_AUC[[1]]$score, - Z_lm_intercept_K_L = map_dbl(Z_lm_K_L, "intercept"), - Z_lm_slope_K_L = map_dbl(Z_lm_K_L, "slope"), - Z_lm_R_squared_K_L = map_dbl(Z_lm_K_L, "r_squared"), - Z_lm_Score_K_L = map_dbl(Z_lm_K_L, "score"), + Z_lm_intercept_K_L = Z_lm_K_L[[1]]$intercept, + Z_lm_slope_K_L = Z_lm_K_L[[1]]$slope, + Z_lm_R_squared_K_L = Z_lm_K_L[[1]]$r_squared, + Z_lm_Score_K_L = Z_lm_K_L[[1]]$score, - Z_lm_intercept_r_L = map_dbl(Z_lm_r_L, "intercept"), - Z_lm_slope_r_L = map_dbl(Z_lm_r_L, "slope"), - Z_lm_R_squared_r_L = map_dbl(Z_lm_r_L, "r_squared"), - Z_lm_Score_r_L = map_dbl(Z_lm_r_L, "score"), + Z_lm_intercept_r_L = Z_lm_r_L[[1]]$intercept, + Z_lm_slope_r_L = Z_lm_r_L[[1]]$slope, + Z_lm_R_squared_r_L = Z_lm_r_L[[1]]$r_squared, + Z_lm_Score_r_L = Z_lm_r_L[[1]]$score, - Z_lm_intercept_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "intercept"), - Z_lm_slope_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "slope"), - Z_lm_R_squared_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "r_squared"), - Z_lm_Score_R_AUC_L = map_dbl(Z_lm_R_AUC_L, "score"), + Z_lm_intercept_R_AUC_L = Z_lm_R_AUC_L[[1]]$intercept, + Z_lm_slope_R_AUC_L = Z_lm_R_AUC_L[[1]]$slope, + Z_lm_R_squared_R_AUC_L = Z_lm_R_AUC_L[[1]]$r_squared, + Z_lm_Score_R_AUC_L = Z_lm_R_AUC_L[[1]]$score, - Z_lm_intercept_R_r_K = map_dbl(Z_lm_R_r_K, "intercept"), - Z_lm_slope_R_r_K = map_dbl(Z_lm_R_r_K, "slope"), - Z_lm_R_squared_R_r_K = map_dbl(Z_lm_R_r_K, "r_squared"), - Z_lm_Score_R_r_K = map_dbl(Z_lm_R_r_K, "score"), + Z_lm_intercept_R_r_K = Z_lm_R_r_K[[1]]$intercept, + Z_lm_slope_R_r_K = Z_lm_R_r_K[[1]]$slope, + Z_lm_R_squared_R_r_K = Z_lm_R_r_K[[1]]$r_squared, + Z_lm_Score_R_r_K = Z_lm_R_r_K[[1]]$score, - Z_lm_intercept_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "intercept"), - Z_lm_slope_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "slope"), - Z_lm_R_squared_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "r_squared"), - Z_lm_Score_R_AUC_K = map_dbl(Z_lm_R_AUC_K, "score"), + Z_lm_intercept_R_AUC_K = Z_lm_R_AUC_K[[1]]$intercept, + Z_lm_slope_R_AUC_K = Z_lm_R_AUC_K[[1]]$slope, + Z_lm_R_squared_R_AUC_K = Z_lm_R_AUC_K[[1]]$r_squared, + Z_lm_Score_R_AUC_K = Z_lm_R_AUC_K[[1]]$score, - Z_lm_intercept_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "intercept"), - Z_lm_slope_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "slope"), - Z_lm_R_squared_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "r_squared"), - Z_lm_Score_R_AUC_r = map_dbl(Z_lm_R_AUC_r, "score") + Z_lm_intercept_R_AUC_r = Z_lm_R_AUC_r[[1]]$intercept, + Z_lm_slope_R_AUC_r = Z_lm_R_AUC_r[[1]]$slope, + Z_lm_R_squared_R_AUC_r = Z_lm_R_AUC_r[[1]]$r_squared, + Z_lm_Score_R_AUC_r = Z_lm_R_AUC_r[[1]]$score ) %>% select( -lm_L, -lm_K, -lm_r, -lm_AUC, -Z_lm_K_L, -Z_lm_r_L, -Z_lm_R_AUC_L,