|
@@ -163,41 +163,26 @@ update_gene_names <- function(df, sgd_gene_list) {
|
|
|
return(df)
|
|
|
}
|
|
|
|
|
|
-calculate_summary_stats <- function(df, variables, group_vars, no_sd = FALSE) {
|
|
|
+calculate_summary_stats <- function(df, variables, group_vars) {
|
|
|
+
|
|
|
summary_stats <- df %>%
|
|
|
group_by(across(all_of(group_vars))) %>%
|
|
|
summarise(
|
|
|
N = n(),
|
|
|
across(
|
|
|
- all_of(variables),
|
|
|
+ all_of(variables),
|
|
|
list(
|
|
|
mean = ~mean(., na.rm = TRUE),
|
|
|
median = ~median(., na.rm = TRUE),
|
|
|
- max = ~ ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
|
|
- min = ~ ifelse(all(is.na(.)), NA, min(., na.rm = TRUE))
|
|
|
+ max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
|
|
+ min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
|
|
+ sd = ~sd(., na.rm = TRUE),
|
|
|
+ se = ~sd(., na.rm = TRUE) / sqrt(N - 1) # TODO non-standard SE, needs explanation
|
|
|
),
|
|
|
.names = "{.fn}_{.col}"
|
|
|
),
|
|
|
.groups = "drop"
|
|
|
)
|
|
|
-
|
|
|
- if (!no_sd) {
|
|
|
- sd_se_stats <- df %>%
|
|
|
- group_by(across(all_of(group_vars))) %>%
|
|
|
- summarise(
|
|
|
- across(
|
|
|
- all_of(variables),
|
|
|
- list(
|
|
|
- sd = ~sd(., na.rm = TRUE),
|
|
|
- se = ~sd(., na.rm = TRUE) / sqrt(N - 1)
|
|
|
- ),
|
|
|
- .names = "{.fn}_{.col}"
|
|
|
- ),
|
|
|
- .groups = "drop"
|
|
|
- )
|
|
|
-
|
|
|
- summary_stats <- left_join(summary_stats, sd_se_stats, by = group_vars)
|
|
|
- }
|
|
|
|
|
|
# Create a cleaned version of df that doesn't overlap with summary_stats
|
|
|
cleaned_df <- df %>%
|
|
@@ -232,21 +217,18 @@ calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "
|
|
|
calculations <- calculate_summary_stats(
|
|
|
df = df,
|
|
|
variables = variables,
|
|
|
- group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"),
|
|
|
- no_sd = TRUE
|
|
|
+ group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
|
|
|
)$df_with_stats
|
|
|
|
|
|
calculations <- calculations %>%
|
|
|
group_by(across(all_of(group_vars))) %>%
|
|
|
mutate(
|
|
|
- sd_L = df$sd_L,
|
|
|
- sd_K = df$sd_K,
|
|
|
- sd_r = df$sd_r,
|
|
|
- sd_AUC = df$sd_AUC,
|
|
|
- se_L = df$se_L,
|
|
|
- se_K = df$se_K,
|
|
|
- se_r = df$se_r,
|
|
|
- se_AUC = df$se_AUC,
|
|
|
+ NG = sum(NG, na.rm = TRUE),
|
|
|
+ DB = sum(DB, na.rm = TRUE),
|
|
|
+ SM = sum(SM, na.rm = TRUE),
|
|
|
+ num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
|
|
|
+
|
|
|
+ # Store the background data
|
|
|
WT_L = bg_means$L,
|
|
|
WT_K = bg_means$K,
|
|
|
WT_r = bg_means$r,
|
|
@@ -275,70 +257,84 @@ calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "
|
|
|
Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
|
|
|
Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
|
|
|
Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
|
|
|
- Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L)
|
|
|
- )
|
|
|
+ Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
|
|
|
|
|
|
+ # Calculate Z-scores
|
|
|
+ Zscore_L = Delta_L / WT_sd_L,
|
|
|
+ Zscore_K = Delta_K / WT_sd_K,
|
|
|
+ Zscore_r = Delta_r / WT_sd_r,
|
|
|
+ Zscore_AUC = Delta_AUC / WT_sd_AUC,
|
|
|
+
|
|
|
+ # Fit linear models and store in list-columns
|
|
|
+ gene_lm_L = list(lm(Delta_L ~ conc_num_factor, data = cur_data())),
|
|
|
+ gene_lm_K = list(lm(Delta_K ~ conc_num_factor, data = cur_data())),
|
|
|
+ gene_lm_r = list(lm(Delta_r ~ conc_num_factor, data = cur_data())),
|
|
|
+ gene_lm_AUC = list(lm(Delta_AUC ~ conc_num_factor, data = cur_data())),
|
|
|
+
|
|
|
+ # Extract coefficients using purrr::map_dbl
|
|
|
+ lm_intercept_L = map_dbl(gene_lm_L, ~ coef(.x)[1]),
|
|
|
+ lm_slope_L = map_dbl(gene_lm_L, ~ coef(.x)[2]),
|
|
|
+ lm_intercept_K = map_dbl(gene_lm_K, ~ coef(.x)[1]),
|
|
|
+ lm_slope_K = map_dbl(gene_lm_K, ~ coef(.x)[2]),
|
|
|
+ lm_intercept_r = map_dbl(gene_lm_r, ~ coef(.x)[1]),
|
|
|
+ lm_slope_r = map_dbl(gene_lm_r, ~ coef(.x)[2]),
|
|
|
+ lm_intercept_AUC = map_dbl(gene_lm_AUC, ~ coef(.x)[1]),
|
|
|
+ lm_slope_AUC = map_dbl(gene_lm_AUC, ~ coef(.x)[2]),
|
|
|
+
|
|
|
+ # Calculate lm_Score_* based on coefficients
|
|
|
+ lm_Score_L = max_conc * lm_slope_L + lm_intercept_L,
|
|
|
+ lm_Score_K = max_conc * lm_slope_K + lm_intercept_K,
|
|
|
+ lm_Score_r = max_conc * lm_slope_r + lm_intercept_r,
|
|
|
+ lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC,
|
|
|
+
|
|
|
+ # Calculate R-squared values
|
|
|
+ R_Squared_L = map_dbl(gene_lm_L, ~ summary(.x)$r.squared),
|
|
|
+ R_Squared_K = map_dbl(gene_lm_K, ~ summary(.x)$r.squared),
|
|
|
+ R_Squared_r = map_dbl(gene_lm_r, ~ summary(.x)$r.squared),
|
|
|
+ R_Squared_AUC = map_dbl(gene_lm_AUC, ~ summary(.x)$r.squared),
|
|
|
+
|
|
|
+ # Calculate Z_lm_* Scores
|
|
|
+ Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
|
|
|
+ Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
|
|
|
+ Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
|
|
|
+ Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
|
|
|
+ ) %>%
|
|
|
+ ungroup()
|
|
|
+
|
|
|
+ # Summarize some of the stats
|
|
|
interactions <- calculations %>%
|
|
|
group_by(across(all_of(group_vars))) %>%
|
|
|
mutate(
|
|
|
OrfRep = first(OrfRep),
|
|
|
Gene = first(Gene),
|
|
|
num = first(num),
|
|
|
- conc_num = first(conc_num),
|
|
|
- conc_num_factor = first(conc_num_factor),
|
|
|
- N = n(),
|
|
|
+
|
|
|
+ # Calculate raw shifts
|
|
|
Raw_Shift_L = first(Raw_Shift_L),
|
|
|
Raw_Shift_K = first(Raw_Shift_K),
|
|
|
Raw_Shift_r = first(Raw_Shift_r),
|
|
|
Raw_Shift_AUC = first(Raw_Shift_AUC),
|
|
|
+
|
|
|
+ # Calculate Z-shifts
|
|
|
Z_Shift_L = first(Z_Shift_L),
|
|
|
Z_Shift_K = first(Z_Shift_K),
|
|
|
Z_Shift_r = first(Z_Shift_r),
|
|
|
Z_Shift_AUC = first(Z_Shift_AUC),
|
|
|
- Zscore_L = Delta_L / WT_sd_L,
|
|
|
- Zscore_K = Delta_K / WT_sd_K,
|
|
|
- Zscore_r = Delta_r / WT_sd_r,
|
|
|
- Zscore_AUC = Delta_AUC / WT_sd_AUC,
|
|
|
+
|
|
|
+ # Sum Z-scores
|
|
|
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
|
|
|
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
|
|
|
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
|
|
|
Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE),
|
|
|
+
|
|
|
+ # Calculate Average Z-scores
|
|
|
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
|
|
|
Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
|
|
|
Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
|
|
|
- Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
|
|
|
- gene_lm_L = lm(formula = Delta_L ~ conc_num_factor, data = df),
|
|
|
- gene_lm_K = lm(formula = Delta_K ~ conc_num_factor, data = df),
|
|
|
- gene_lm_r = lm(formula = Delta_r ~ conc_num_factor, data = df),
|
|
|
- gene_lm_AUC = lm(formula = Delta_AUC ~ conc_num_factor, data = df),
|
|
|
- lm_Score_L = max_conc * (gene_lm_L$coefficients[2]) + gene_lm_L$coefficients[1],
|
|
|
- lm_Score_K = max_conc * (gene_lm_K$coefficients[2]) + gene_lm_K$coefficients[1],
|
|
|
- lm_Score_r = max_conc * (gene_lm_r$coefficients[2]) + gene_lm_r$coefficients[1],
|
|
|
- lm_Score_AUC = max_conc * (gene_lm_AUC$coefficients[2]) + gene_lm_AUC$coefficients[1],
|
|
|
- R_Squared_L = summary(gene_lm_L)$r.squared,
|
|
|
- R_Squared_K = summary(gene_lm_K)$r.squared,
|
|
|
- R_Squared_r = summary(gene_lm_r)$r.squared,
|
|
|
- R_Squared_AUC = summary(gene_lm_AUC)$r.squared,
|
|
|
-
|
|
|
- lm_intercept_L = coef(lm(Zscore_L ~ conc_num_factor))[1],
|
|
|
- lm_slope_L = coef(lm(Zscore_L ~ conc_num))[2],
|
|
|
- lm_intercept_K = coef(lm(Zscore_K ~ conc_num))[1],
|
|
|
- lm_slope_K = coef(lm(Zscore_K ~ conc_num))[2],
|
|
|
- lm_intercept_r = coef(lm(Zscore_r ~ conc_num))[1],
|
|
|
- lm_slope_r = coef(lm(Zscore_r ~ conc_num))[2],
|
|
|
- lm_intercept_AUC = coef(lm(Zscore_AUC ~ conc_num))[1],
|
|
|
- lm_slope_AUC = coef(lm(Zscore_AUC ~ conc_num))[2],
|
|
|
-
|
|
|
- Z_lm_L = (lm_Score_L - mean_L) / sd(lm_Score_L, na.rm = TRUE),
|
|
|
- Z_lm_K = (lm_Score_K - mean_K) / sd(lm_Score_K, na.rm = TRUE),
|
|
|
- Z_lm_r = (lm_Score_r - mean_r) / sd(lm_Score_r, na.rm = TRUE),
|
|
|
- Z_lm_AUC = (lm_Score_AUC - mean_AUC) / sd(lm_Score_AUC, na.rm = TRUE),
|
|
|
- NG = sum(NG, na.rm = TRUE),
|
|
|
- DB = sum(DB, na.rm = TRUE),
|
|
|
- SM = sum(SM, na.rm = TRUE),
|
|
|
- num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1
|
|
|
+ Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1)
|
|
|
) %>%
|
|
|
- arrange(desc(Z_lm_L), desc(NG))
|
|
|
+ arrange(desc(Z_lm_L), desc(NG)) %>%
|
|
|
+ ungroup()
|
|
|
|
|
|
# Declare column order for output
|
|
|
calculations <- calculations %>%
|
|
@@ -356,17 +352,33 @@ calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "
|
|
|
"Delta_L", "Delta_K", "Delta_r", "Delta_AUC",
|
|
|
"Zscore_L", "Zscore_K", "Zscore_r", "Zscore_AUC",
|
|
|
"NG", "SM", "DB")
|
|
|
+
|
|
|
+ interactions <- interactions %>%
|
|
|
+ select(
|
|
|
+ "OrfRep", "Gene", "num", "NG", "DB", "SM",
|
|
|
+ "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
|
|
+ "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
|
|
+ "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
|
|
|
+ "Avg_Zscore_L", "Avg_Zscore_K", "Avg_Zscore_r", "Avg_Zscore_AUC",
|
|
|
+ "lm_Score_L", "lm_Score_K", "lm_Score_r", "lm_Score_AUC",
|
|
|
+ "R_Squared_L", "R_Squared_K", "R_Squared_r", "R_Squared_AUC",
|
|
|
+ "Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC")
|
|
|
|
|
|
- cleaned_df <- df %>%
|
|
|
- select(-any_of(setdiff(intersect(names(df), names(calculations)), group_vars)))
|
|
|
- calculations_joined <- left_join(cleaned_df, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
|
|
+ # cleaned_df <- df %>%
|
|
|
+ # select(-any_of(setdiff(intersect(names(df), names(calculations)), group_vars)))
|
|
|
+
|
|
|
+ # calculations_joined <- left_join(cleaned_df, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
|
|
|
|
|
cleaned_df <- df %>%
|
|
|
select(-any_of(setdiff(intersect(names(df), names(interactions)), group_vars)))
|
|
|
+
|
|
|
interactions_joined <- left_join(cleaned_df, interactions, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
|
|
|
|
|
|
- return(list(calculations = calculations, interactions = interactions, interactions_joined = interactions_joined,
|
|
|
- calculations_joined = calculations_joined))
|
|
|
+ return(list(
|
|
|
+ calculations = calculations,
|
|
|
+ interactions = interactions,
|
|
|
+ interactions_joined = interactions_joined
|
|
|
+ ))
|
|
|
}
|
|
|
|
|
|
generate_and_save_plots <- function(out_dir, filename, plot_configs, grid_layout = NULL) {
|
|
@@ -380,18 +392,16 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, grid_layout
|
|
|
config <- plot_configs[[i]]
|
|
|
df <- config$df
|
|
|
|
|
|
- aes_mapping <- if (is.null(config$color_var)) {
|
|
|
- if (is.null(config$y_var)) {
|
|
|
+ aes_mapping <- if (config$plot_type == "bar" || config$plot_type == "density") {
|
|
|
+ if (is.null(config$color_var)) {
|
|
|
aes(x = .data[[config$x_var]])
|
|
|
} else {
|
|
|
- aes(x = .data[[config$x_var]], y = .data[[config$y_var]])
|
|
|
- }
|
|
|
- } else {
|
|
|
- if (is.null(config$y_var)) {
|
|
|
aes(x = .data[[config$x_var]], color = as.factor(.data[[config$color_var]]))
|
|
|
- } else {
|
|
|
- aes(x = .data[[config$x_var]], y = .data[[config$y_var]], color = as.factor(.data[[config$color_var]]))
|
|
|
}
|
|
|
+ } else if (is.null(config$color_var)) {
|
|
|
+ aes(x = .data[[config$x_var]], y = .data[[config$y_var]])
|
|
|
+ } else {
|
|
|
+ aes(x = .data[[config$x_var]], y = .data[[config$y_var]], color = as.factor(.data[[config$color_var]]))
|
|
|
}
|
|
|
|
|
|
# Start building the plot with aes_mapping
|
|
@@ -1331,14 +1341,14 @@ main <- function() {
|
|
|
reference_results <- calculate_interaction_scores(reference_strain, max_conc)
|
|
|
zscores_calculations_reference <- reference_results$calculations
|
|
|
zscores_interactions_reference <- reference_results$interactions
|
|
|
- zscores_calculations_reference_joined <- reference_results$calculations_joined
|
|
|
+ # zscores_calculations_reference_joined <- reference_results$calculations_joined
|
|
|
zscores_interactions_reference_joined <- reference_results$interactions_joined
|
|
|
|
|
|
message("Calculating deletion strain(s) interactions scores")
|
|
|
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc)
|
|
|
zscores_calculations <- deletion_results$calculations
|
|
|
zscores_interactions <- deletion_results$interactions
|
|
|
- zscores_calculations_joined <- deletion_results$calculations_joined
|
|
|
+ # zscores_calculations_joined <- deletion_results$calculations_joined
|
|
|
zscores_interactions_joined <- deletion_results$interactions_joined
|
|
|
|
|
|
# Writing Z-Scores to file
|