Auto-commit: apps/r/calculate_interaction_zscores5.R

This commit is contained in:
2024-09-01 02:31:49 -04:00
parent 9c48b25b42
commit ed1e3cf756

View File

@@ -285,7 +285,6 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir)
}
df_strains <- bind_rows(df, df_temp)
}
return(df_strains)
}
@@ -426,6 +425,81 @@ generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
save_plots("Correlation_CPPs", plot_list, output_dir)
}
# Adjust missing values and calculate ranks
adjust_missing_and_rank <- function(df) {
df <- df %>%
mutate(
Avg_Zscore_L = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L),
Avg_Zscore_K = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K),
Avg_Zscore_r = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r),
Avg_Zscore_AUC = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC),
Z_lm_L = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
Z_lm_K = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
Z_lm_r = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
Z_lm_AUC = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC),
L_Rank = rank(Avg_Zscore_L),
K_Rank = rank(Avg_Zscore_K),
r_Rank = rank(Avg_Zscore_r),
AUC_Rank = rank(Avg_Zscore_AUC),
L_Rank_lm = rank(Z_lm_L),
K_Rank_lm = rank(Z_lm_K),
r_Rank_lm = rank(Z_lm_r),
AUC_Rank_lm = rank(Z_lm_AUC)
)
return(df)
}
# Generate ranked plots for a specific metric
generate_ranked_plot <- function(df, rank_var, zscore_var, sd_threshold, title_prefix) {
ggplot(df, aes(x = {{rank_var}}, y = {{zscore_var}})) +
ggtitle(paste(title_prefix, "above", sd_threshold, "SD")) +
xlab("Rank") + ylab(paste("Avg Z score", title_prefix)) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = sd_threshold, ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -sd_threshold, ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-sd_threshold, sd_threshold)) +
geom_point(size = 0.1, shape = 3) +
theme_publication()
}
# Generate and save all ranked plots
generate_and_save_ranked_plots <- function(df, output_dir, prefix) {
rank_metrics <- list(
list("L_Rank", "Avg_Zscore_L", "L"),
list("K_Rank", "Avg_Zscore_K", "K"),
list("r_Rank", "Avg_Zscore_r", "r"),
list("AUC_Rank", "Avg_Zscore_AUC", "AUC"),
list("L_Rank_lm", "Z_lm_L", "L"),
list("K_Rank_lm", "Z_lm_K", "K"),
list("r_Rank_lm", "Z_lm_r", "r"),
list("AUC_Rank_lm", "Z_lm_AUC", "AUC")
)
pdf(file.path(output_dir, paste0(prefix, ".pdf")), width = 18, height = 12, onefile = TRUE)
for (sd_threshold in c(1, 2, 3)) {
for (metric in rank_metrics) {
plot <- generate_ranked_plot(df, sym(metric[[1]]), sym(metric[[2]]), sd_threshold, metric[[3]])
print(plot)
}
}
dev.off()
}
# Function to create and save all ranked plots
create_ranked_plots <- function(df, output_dir) {
df_adjusted <- adjust_missing_and_rank(df)
# Generate and save ranked plots
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots")
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_lm")
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_naRM")
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_lm_naRM")
}
main <- function() {
# Applying to all experiments
lapply(names(args$experiments), function(exp_name) {
@@ -642,7 +716,12 @@ main <- function() {
lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
)
# Generate cpp correlation plots
generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir)
# Generate ranked plots
create_ranked_plots(zscores_interactions_filtered, out_dir)
})
})
}