Auto-commit: apps/r/calculate_interaction_zscores5.R

This commit is contained in:
2024-08-31 12:32:05 -04:00
parent d460f1e51d
commit 7b0b2c9bbd

View File

@@ -289,9 +289,30 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir)
return(df_strains)
}
calculate_interaction_scores <- function(interaction_scores, df, df_stats_by_l, df_stats_by_k,
df_stats_by_r, df_stats_by_auc, background_means,
max_conc, variables, out_dir, is_reference = TRUE) {
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) {
# Initialize the dataframe
interaction_scores <- df %>%
distinct(OrfRep) %>%
mutate(
Gene = NA,
raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA,
r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA,
raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA,
r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA,
raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA,
r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA,
raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA,
r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA,
NG = NA, DB = NA, SM = NA
)
# The list to hold all of the combined interactions
interaction_scores_list <- list()
# TODO is this necessary?
# Initialize the dataframe
interaction_scores <- interaction_scores %>%
mutate(across(.cols = everything(), ~ NA)) # Initialize columns with NA
@@ -385,67 +406,65 @@ calculate_interaction_scores <- function(interaction_scores, df, df_stats_by_l,
SM = sum(df_stats_interaction$SM, na.rm = TRUE)
)
# If deletion strain, append to the final interaction statistics
if (!is_reference) {
if (i == 1) {
df_stats_interaction_all <- df_stats_interaction
} else {
df_stats_interaction_all <- bind_rows(df_stats_interaction_all, df_stats_interaction)
}
}
interaction_scores_list[[i]] <- interaction_scores
}
# If deletion strain, return the final interaction statistics along with the scores
if (!is_reference) {
return(list(interaction_scores = interaction_scores, df_stats_interaction_all = df_stats_interaction_all))
}
return(interaction_scores)
return(interaction_scores_list)
}
filter_and_save_interaction_scores <- function(interaction_scores, out_dir, prefix) {
# Arrange the interaction scores by Z_lm_L and NG
interaction_scores <- interaction_scores %>%
arrange(desc(Z_lm_L)) %>%
arrange(desc(NG))
arrange(desc(Z_lm_L), desc(NG))
output_files <- list(
paste0(prefix, "_ZScores_Interaction.csv") = interaction_scores,
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_L.csv") = filter(interaction_scores, Avg_Zscore_L >= 2),
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_K.csv") = filter(interaction_scores, Avg_Zscore_K <= -2),
paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_L.csv") = filter(interaction_scores, Avg_Zscore_L <= -2),
paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_K.csv") = filter(interaction_scores, Avg_Zscore_K >= 2),
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv") = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2),
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv") = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2),
paste0(prefix, "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv") = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2),
paste0(prefix, "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv") = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2),
paste0(prefix, "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv") = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2),
paste0(prefix, "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv") = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2)
filters <- list(
list(name = "_ZScores_Interaction.csv",
filter = interaction_scores),
list(name = "_ZScores_Interaction_DeletionEnhancers_L.csv",
filter = filter(interaction_scores, Avg_Zscore_L >= 2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_K.csv",
filter = filter(interaction_scores, Avg_Zscore_K <= -2)),
list(name = "_ZScores_Interaction_DeletionSuppressors_L.csv",
filter = filter(interaction_scores, Avg_Zscore_L <= -2)),
list(name = "_ZScores_Interaction_DeletionSuppressors_K.csv",
filter = filter(interaction_scores, Avg_Zscore_K >= 2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",
filter = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",
filter = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2)),
list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",
filter = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2)),
list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",
filter = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2)),
list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",
filter = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2)),
list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",
filter = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_L_lm.csv",
filter = filter(interaction_scores, Z_lm_L >= 2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_K_lm.csv",
filter = filter(interaction_scores, Z_lm_K <= -2)),
list(name = "_ZScores_Interaction_DeletionSuppressors_L_lm.csv",
filter = filter(interaction_scores, Z_lm_L <= -2)),
list(name = "_ZScores_Interaction_DeletionSuppressors_K_lm.csv",
filter = filter(interaction_scores, Z_lm_K >= 2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",
filter = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2)),
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",
filter = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2))
)
output_files_lm <- list(
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_L_lm.csv") = filter(interaction_scores, Z_lm_L >= 2),
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_K_lm.csv") = filter(interaction_scores, Z_lm_K <= -2),
paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_L_lm.csv") = filter(interaction_scores, Z_lm_L <= -2),
paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_K_lm.csv") = filter(interaction_scores, Z_lm_K >= 2),
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv") = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2),
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv") = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2)
)
for (file_name in names(output_files)) {
write.csv(output_files[[file_name]], file = file.path(out_dir, file_name), row.names = FALSE)
}
for (file_name in names(output_files_lm)) {
write.csv(output_files_lm[[file_name]], file = file.path(out_dir, file_name), row.names = FALSE)
# Iterate over each filter and save the corresponding CSV file
for (item in filters) {
file_name <- paste0(prefix, item$name)
write.csv(item$filter, file = file.path(out_dir, file_name), row.names = FALSE)
}
}
main <- function() {
# Applying to all experiments
lapply(names(args$experiments), function(exp_name) {
@@ -578,55 +597,25 @@ main <- function() {
df_reference_strains <- df_reference_strains %>%
mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
num_genes <- length(unique(df_reference_strains$OrfRep))
print(num_genes)
# num_genes <- length(unique(df_reference_strains$OrfRep))
# print(num_genes)
# TODO: Is this necessary?
interaction_scores_reference <- df_reference_strains %>%
distinct(OrfRep) %>%
mutate(
Gene = NA,
raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA,
r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA,
raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA,
r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA,
raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA,
r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA,
raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA,
r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA,
NG = NA, SM = NA
)
interaction_scores_reference <- calculate_interaction_scores(
interaction_scores_reference, df_reference_strains, df_stats_by_l, df_stats_by_k,
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir, is_reference = TRUE)
variables <- c("L", "K", "r", "AUC")
interaction_scores_reference <- calculate_interaction_scores(df_reference_strains, df_stats_by_l,
df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir)
interaction_scores_deletion <- calculate_interaction_scores(df_deletion_strains, df_stats_by_l,
df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir)
write.csv(interaction_scores_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
interaction_scores_deletion <- df_deletion_strains %>%
distinct(OrfRep) %>%
mutate(
Gene = NA,
raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA,
r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA,
raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA,
r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA,
raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA,
r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA,
raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA,
r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA,
NG = NA, DB = NA, SM = NA
)
result <- calculate_interaction_scores(
interaction_scores_deletion, df_deletion_strains, df_stats_by_l, df_stats_by_k,
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir, is_reference = FALSE)
interaction_scores_deletion <- result$interaction_scores
df_stats_interaction_all <- result$df_stats_interaction_all
write.csv(interaction_scores_deletion, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
write.csv(df_stats_interaction_all, file = file.path(out_dir, "RF_ZScore_Calculations.csv"), row.names = FALSE)
# write.csv(interaction_scores_deletion_all, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
# Filter and save interaction scores for reference strains
filter_and_save_interaction_scores(interaction_scores_reference, out_dir, "RF")