Auto-commit: apps/r/calculate_interaction_zscores5.R
This commit is contained in:
@@ -289,9 +289,30 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir)
|
|||||||
return(df_strains)
|
return(df_strains)
|
||||||
}
|
}
|
||||||
|
|
||||||
calculate_interaction_scores <- function(interaction_scores, df, df_stats_by_l, df_stats_by_k,
|
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k,
|
||||||
df_stats_by_r, df_stats_by_auc, background_means,
|
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir) {
|
||||||
max_conc, variables, out_dir, is_reference = TRUE) {
|
|
||||||
|
# Initialize the dataframe
|
||||||
|
interaction_scores <- df %>%
|
||||||
|
distinct(OrfRep) %>%
|
||||||
|
mutate(
|
||||||
|
Gene = NA,
|
||||||
|
raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA,
|
||||||
|
r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA,
|
||||||
|
raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA,
|
||||||
|
r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA,
|
||||||
|
raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA,
|
||||||
|
r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA,
|
||||||
|
raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA,
|
||||||
|
r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA,
|
||||||
|
NG = NA, DB = NA, SM = NA
|
||||||
|
)
|
||||||
|
|
||||||
|
# The list to hold all of the combined interactions
|
||||||
|
interaction_scores_list <- list()
|
||||||
|
|
||||||
|
# TODO is this necessary?
|
||||||
|
# Initialize the dataframe
|
||||||
interaction_scores <- interaction_scores %>%
|
interaction_scores <- interaction_scores %>%
|
||||||
mutate(across(.cols = everything(), ~ NA)) # Initialize columns with NA
|
mutate(across(.cols = everything(), ~ NA)) # Initialize columns with NA
|
||||||
|
|
||||||
@@ -385,67 +406,65 @@ calculate_interaction_scores <- function(interaction_scores, df, df_stats_by_l,
|
|||||||
SM = sum(df_stats_interaction$SM, na.rm = TRUE)
|
SM = sum(df_stats_interaction$SM, na.rm = TRUE)
|
||||||
)
|
)
|
||||||
|
|
||||||
# If deletion strain, append to the final interaction statistics
|
interaction_scores_list[[i]] <- interaction_scores
|
||||||
if (!is_reference) {
|
|
||||||
if (i == 1) {
|
|
||||||
df_stats_interaction_all <- df_stats_interaction
|
|
||||||
} else {
|
|
||||||
df_stats_interaction_all <- bind_rows(df_stats_interaction_all, df_stats_interaction)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# If deletion strain, return the final interaction statistics along with the scores
|
return(interaction_scores_list)
|
||||||
if (!is_reference) {
|
|
||||||
return(list(interaction_scores = interaction_scores, df_stats_interaction_all = df_stats_interaction_all))
|
|
||||||
}
|
|
||||||
|
|
||||||
return(interaction_scores)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
filter_and_save_interaction_scores <- function(interaction_scores, out_dir, prefix) {
|
filter_and_save_interaction_scores <- function(interaction_scores, out_dir, prefix) {
|
||||||
|
|
||||||
|
# Arrange the interaction scores by Z_lm_L and NG
|
||||||
interaction_scores <- interaction_scores %>%
|
interaction_scores <- interaction_scores %>%
|
||||||
arrange(desc(Z_lm_L)) %>%
|
arrange(desc(Z_lm_L), desc(NG))
|
||||||
arrange(desc(NG))
|
|
||||||
|
|
||||||
output_files <- list(
|
filters <- list(
|
||||||
paste0(prefix, "_ZScores_Interaction.csv") = interaction_scores,
|
list(name = "_ZScores_Interaction.csv",
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_L.csv") = filter(interaction_scores, Avg_Zscore_L >= 2),
|
filter = interaction_scores),
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_K.csv") = filter(interaction_scores, Avg_Zscore_K <= -2),
|
list(name = "_ZScores_Interaction_DeletionEnhancers_L.csv",
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_L.csv") = filter(interaction_scores, Avg_Zscore_L <= -2),
|
filter = filter(interaction_scores, Avg_Zscore_L >= 2)),
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_K.csv") = filter(interaction_scores, Avg_Zscore_K >= 2),
|
list(name = "_ZScores_Interaction_DeletionEnhancers_K.csv",
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv") = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2),
|
filter = filter(interaction_scores, Avg_Zscore_K <= -2)),
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv") = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2),
|
list(name = "_ZScores_Interaction_DeletionSuppressors_L.csv",
|
||||||
paste0(prefix, "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv") = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2),
|
filter = filter(interaction_scores, Avg_Zscore_L <= -2)),
|
||||||
paste0(prefix, "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv") = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2),
|
list(name = "_ZScores_Interaction_DeletionSuppressors_K.csv",
|
||||||
paste0(prefix, "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv") = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2),
|
filter = filter(interaction_scores, Avg_Zscore_K >= 2)),
|
||||||
paste0(prefix, "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv") = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2)
|
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv",
|
||||||
|
filter = filter(interaction_scores, Avg_Zscore_L >= 2 | Avg_Zscore_L <= -2)),
|
||||||
|
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv",
|
||||||
|
filter = filter(interaction_scores, Avg_Zscore_K >= 2 | Avg_Zscore_K <= -2)),
|
||||||
|
list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_L.csv",
|
||||||
|
filter = filter(interaction_scores, Z_lm_L >= 2 & Avg_Zscore_L <= -2)),
|
||||||
|
list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_L.csv",
|
||||||
|
filter = filter(interaction_scores, Z_lm_L <= -2 & Avg_Zscore_L >= 2)),
|
||||||
|
list(name = "_ZScores_Interaction_Suppressors_and_lm_Enhancers_K.csv",
|
||||||
|
filter = filter(interaction_scores, Z_lm_K <= -2 & Avg_Zscore_K >= 2)),
|
||||||
|
list(name = "_ZScores_Interaction_Enhancers_and_lm_Suppressors_K.csv",
|
||||||
|
filter = filter(interaction_scores, Z_lm_K >= 2 & Avg_Zscore_K <= -2)),
|
||||||
|
list(name = "_ZScores_Interaction_DeletionEnhancers_L_lm.csv",
|
||||||
|
filter = filter(interaction_scores, Z_lm_L >= 2)),
|
||||||
|
list(name = "_ZScores_Interaction_DeletionEnhancers_K_lm.csv",
|
||||||
|
filter = filter(interaction_scores, Z_lm_K <= -2)),
|
||||||
|
list(name = "_ZScores_Interaction_DeletionSuppressors_L_lm.csv",
|
||||||
|
filter = filter(interaction_scores, Z_lm_L <= -2)),
|
||||||
|
list(name = "_ZScores_Interaction_DeletionSuppressors_K_lm.csv",
|
||||||
|
filter = filter(interaction_scores, Z_lm_K >= 2)),
|
||||||
|
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv",
|
||||||
|
filter = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2)),
|
||||||
|
list(name = "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv",
|
||||||
|
filter = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2))
|
||||||
)
|
)
|
||||||
|
|
||||||
output_files_lm <- list(
|
# Iterate over each filter and save the corresponding CSV file
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_L_lm.csv") = filter(interaction_scores, Z_lm_L >= 2),
|
for (item in filters) {
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_K_lm.csv") = filter(interaction_scores, Z_lm_K <= -2),
|
file_name <- paste0(prefix, item$name)
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_L_lm.csv") = filter(interaction_scores, Z_lm_L <= -2),
|
write.csv(item$filter, file = file.path(out_dir, file_name), row.names = FALSE)
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionSuppressors_K_lm.csv") = filter(interaction_scores, Z_lm_K >= 2),
|
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_L_lm.csv") = filter(interaction_scores, Z_lm_L >= 2 | Z_lm_L <= -2),
|
|
||||||
paste0(prefix, "_ZScores_Interaction_DeletionEnhancers_and_Suppressors_K_lm.csv") = filter(interaction_scores, Z_lm_K >= 2 | Z_lm_K <= -2)
|
|
||||||
)
|
|
||||||
|
|
||||||
for (file_name in names(output_files)) {
|
|
||||||
write.csv(output_files[[file_name]], file = file.path(out_dir, file_name), row.names = FALSE)
|
|
||||||
}
|
|
||||||
|
|
||||||
for (file_name in names(output_files_lm)) {
|
|
||||||
write.csv(output_files_lm[[file_name]], file = file.path(out_dir, file_name), row.names = FALSE)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
main <- function() {
|
main <- function() {
|
||||||
# Applying to all experiments
|
# Applying to all experiments
|
||||||
lapply(names(args$experiments), function(exp_name) {
|
lapply(names(args$experiments), function(exp_name) {
|
||||||
@@ -578,55 +597,25 @@ main <- function() {
|
|||||||
df_reference_strains <- df_reference_strains %>%
|
df_reference_strains <- df_reference_strains %>%
|
||||||
mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
|
mutate(OrfRep = paste(OrfRep, Gene, num, sep = "_"))
|
||||||
|
|
||||||
num_genes <- length(unique(df_reference_strains$OrfRep))
|
# num_genes <- length(unique(df_reference_strains$OrfRep))
|
||||||
print(num_genes)
|
# print(num_genes)
|
||||||
|
|
||||||
# TODO: Is this necessary?
|
variables <- c("L", "K", "r", "AUC")
|
||||||
interaction_scores_reference <- df_reference_strains %>%
|
interaction_scores_reference <- calculate_interaction_scores(df_reference_strains, df_stats_by_l,
|
||||||
distinct(OrfRep) %>%
|
df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir)
|
||||||
mutate(
|
interaction_scores_deletion <- calculate_interaction_scores(df_deletion_strains, df_stats_by_l,
|
||||||
Gene = NA,
|
df_stats_by_k, df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir)
|
||||||
raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA,
|
|
||||||
r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA,
|
|
||||||
raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA,
|
|
||||||
r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA,
|
|
||||||
raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA,
|
|
||||||
r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA,
|
|
||||||
raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA,
|
|
||||||
r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA,
|
|
||||||
NG = NA, SM = NA
|
|
||||||
)
|
|
||||||
|
|
||||||
interaction_scores_reference <- calculate_interaction_scores(
|
|
||||||
interaction_scores_reference, df_reference_strains, df_stats_by_l, df_stats_by_k,
|
|
||||||
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir, is_reference = TRUE)
|
|
||||||
|
|
||||||
write.csv(interaction_scores_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
|
write.csv(interaction_scores_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
|
||||||
|
|
||||||
interaction_scores_deletion <- df_deletion_strains %>%
|
|
||||||
distinct(OrfRep) %>%
|
|
||||||
mutate(
|
|
||||||
Gene = NA,
|
|
||||||
raw_shift_l = NA, z_shift_l = NA, lm_score_l = NA, z_lm_l = NA,
|
|
||||||
r_squared_l = NA, sum_z_score_l = NA, avg_zscore_l = NA,
|
|
||||||
raw_shift_k = NA, z_shift_k = NA, lm_score_k = NA, z_lm_k = NA,
|
|
||||||
r_squared_k = NA, sum_z_score_k = NA, avg_zscore_k = NA,
|
|
||||||
raw_shift_r = NA, z_shift_r = NA, lm_score_r = NA, z_lm_r = NA,
|
|
||||||
r_squared_r = NA, sum_z_score_r = NA, avg_zscore_r = NA,
|
|
||||||
raw_shift_auc = NA, z_shift_auc = NA, lm_score_auc = NA, z_lm_auc = NA,
|
|
||||||
r_squared_auc = NA, sum_z_score_auc = NA, avg_zscore_auc = NA,
|
|
||||||
NG = NA, DB = NA, SM = NA
|
|
||||||
)
|
|
||||||
|
|
||||||
result <- calculate_interaction_scores(
|
|
||||||
interaction_scores_deletion, df_deletion_strains, df_stats_by_l, df_stats_by_k,
|
|
||||||
df_stats_by_r, df_stats_by_auc, background_means, max_conc, variables, out_dir, is_reference = FALSE)
|
|
||||||
|
|
||||||
interaction_scores_deletion <- result$interaction_scores
|
|
||||||
df_stats_interaction_all <- result$df_stats_interaction_all
|
|
||||||
|
|
||||||
write.csv(interaction_scores_deletion, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
|
write.csv(interaction_scores_deletion, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
|
||||||
write.csv(df_stats_interaction_all, file = file.path(out_dir, "RF_ZScore_Calculations.csv"), row.names = FALSE)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# write.csv(interaction_scores_deletion_all, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
|
||||||
|
|
||||||
# Filter and save interaction scores for reference strains
|
# Filter and save interaction scores for reference strains
|
||||||
filter_and_save_interaction_scores(interaction_scores_reference, out_dir, "RF")
|
filter_and_save_interaction_scores(interaction_scores_reference, out_dir, "RF")
|
||||||
|
|||||||
Reference in New Issue
Block a user