Specify output columns for interaction scores

This commit is contained in:
2024-09-10 15:40:25 -04:00
parent 324a4a15d5
commit ba8e4ced3e

View File

@@ -201,7 +201,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
)
interaction_scores <- df %>%
calculations <- df %>%
mutate(
WT_L = df$mean_L,
WT_K = df$mean_K,
@@ -229,20 +229,20 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
) %>%
ungroup()
interaction_scores <- interaction_scores %>%
calculations <- calculations %>%
group_by(across(all_of(group_vars))) %>%
mutate(
Raw_Shift_L = mean_L[[1]] - bg_means$L,
Raw_Shift_K = mean_K[[1]] - bg_means$K,
Raw_Shift_r = mean_r[[1]] - bg_means$r,
Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC,
Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]],
Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]],
Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]],
Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]]
Z_Shift_L = Raw_Shift_L[[1]] / bg_sd$L,
Z_Shift_K = Raw_Shift_K[[1]] / bg_sd$K,
Z_Shift_r = Raw_Shift_r[[1]] / bg_sd$r,
Z_Shift_AUC = Raw_Shift_AUC[[1]] / bg_sd$AUC
)
interaction_scores <- interaction_scores %>%
calculations <- calculations %>%
mutate(
Exp_L = WT_L + Raw_Shift_L,
Delta_L = mean_L - Exp_L,
@@ -254,7 +254,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
Delta_AUC = mean_AUC - Exp_AUC
)
interaction_scores <- interaction_scores %>%
calculations <- calculations %>%
mutate(
Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
@@ -263,8 +263,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L)
)
# Calculate linear models and interaction scores
interaction_scores <- interaction_scores %>%
interactions <- calculations %>%
mutate(
lm_L = lm(Delta_L ~ conc_num_factor),
lm_K = lm(Delta_K ~ conc_num_factor),
@@ -276,20 +275,8 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
Zscore_AUC = Delta_AUC / WT_sd_AUC
)
interaction_scores <- interaction_scores %>%
interactions <- interactions %>%
mutate(
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
)
interaction_scores_all <- interaction_scores %>%
mutate(
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1],
lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1],
lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1],
@@ -297,25 +284,54 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
r_squared_L = summary(lm_L)$r.squared,
r_squared_K = summary(lm_K)$r.squared,
r_squared_r = summary(lm_r)$r.squared,
r_squared_AUC = summary(lm_AUC)$r.squared
r_squared_AUC = summary(lm_AUC)$r.squared,
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
)
# Calculate Z_lm for each variable
interaction_scores_all <- interaction_scores_all %>%
interactions <- interactions %>%
mutate(
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
)
calculations <- calculations %>%
select("OrfRep", "Gene", "num", "conc_num", "conc_num_factor",
"mean_L", "mean_K", "mean_r", "mean_AUC",
"median_L", "median_K", "median_r", "median_AUC",
"sd_L", "sd_K", "sd_r", "sd_AUC",
"se_L", "se_K", "se_r", "se_AUC",
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
"WT_L", "WT_K", "WT_r", "WT_AUC", "WT_sd_L", "WT_sd_K", "WT_sd_r", "WT_sd_AUC",
"Exp_L", "Exp_K", "Exp_r", "Exp_AUC", "Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
"Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
"NG", "SM", "DB") %>%
ungroup()
# Arrange results by Z_lm_L and NG
interaction_scores_all <- interaction_scores_all %>%
interactions <- interactions %>%
select("OrfRep", "Gene", "num", "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_AUC", "Raw_Shift_r",
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
"lm_Score_L", "lm_Score_K", "lm_Score_AUC", "lm_Score_r",
"R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
"Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
"Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
"Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC",
"NG", "SM", "DB") %>%
arrange(desc(lm_Score_L)) %>%
arrange(desc(NG)) %>%
ungroup()
return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
return(list(calculations = calculations, interactions = interaction))
}
generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
@@ -674,6 +690,7 @@ main <- function() {
# Print quality control graphs before removing data due to contamination and
# adjusting missing data to max theoretical values
message("Generating QC plot configurations")
l_vs_k_plots <- list(
list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
title = "Raw L vs K before QC",
@@ -864,10 +881,10 @@ main <- function() {
print(head(deletion_strains))
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables)
zscores_calculations_reference <- reference_results$zscores_calculations
zscores_interactions_reference <- reference_results$zscores_interactions
zscores_calculations <- deletion_results$zscores_calculations
zscores_interactions <- deletion_results$zscores_interactions
zscores_calculations_reference <- reference_results$calculations
zscores_interactions_reference <- reference_results$interactions
zscores_calculations <- deletion_results$calculations
zscores_interactions <- deletion_results$interactions
# Writing Z-Scores to file
write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
@@ -946,9 +963,20 @@ main <- function() {
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2))
interaction_scores_filtered
# lm_summaries <- lapply(lm_list, summary)
# correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
# generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs)
})
})
}
main()
# # Correlation plots
# lm_list <- list(
# lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
@@ -958,11 +986,3 @@ main <- function() {
# lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
# lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
# )
lm_summaries <- lapply(lm_list, summary)
correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs)
})
})
}
main()