Auto-commit: apps/r/calculate_interaction_zscores5.R

This commit is contained in:
2024-09-01 02:31:49 -04:00
parent 9c48b25b42
commit ed1e3cf756

View File

@@ -285,7 +285,6 @@ process_strains <- function(df, df_stats_by_l_within_2sd_k, strain, output_dir)
} }
df_strains <- bind_rows(df, df_temp) df_strains <- bind_rows(df, df_temp)
} }
return(df_strains) return(df_strains)
} }
@@ -412,7 +411,7 @@ generate_summary_plots <- function(df, output_dir) {
generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) { generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
lm_summaries <- lapply(lm_list, summary) lm_summaries <- lapply(lm_list, summary)
plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC", plot_titles <- c("Interaction L vs. Interaction K", "Interaction L vs. Interaction r", "Interaction L vs. Interaction AUC",
"Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC") "Interaction K vs. Interaction r", "Interaction K vs. Interaction AUC", "Interaction r vs. Interaction AUC")
plot_list <- lapply(seq_along(lm_list), function(i) { plot_list <- lapply(seq_along(lm_list), function(i) {
ggplot(df_na_rm, aes_string(x = names(lm_list)[i][1], y = names(lm_list)[i][2])) + ggplot(df_na_rm, aes_string(x = names(lm_list)[i][1], y = names(lm_list)[i][2])) +
@@ -426,6 +425,81 @@ generate_cpp_correlation_plots <- function(df_na_rm, lm_list, output_dir) {
save_plots("Correlation_CPPs", plot_list, output_dir) save_plots("Correlation_CPPs", plot_list, output_dir)
} }
# Adjust missing values and calculate ranks
adjust_missing_and_rank <- function(df) {
df <- df %>%
mutate(
Avg_Zscore_L = ifelse(is.na(Avg_Zscore_L), 0.001, Avg_Zscore_L),
Avg_Zscore_K = ifelse(is.na(Avg_Zscore_K), 0.001, Avg_Zscore_K),
Avg_Zscore_r = ifelse(is.na(Avg_Zscore_r), 0.001, Avg_Zscore_r),
Avg_Zscore_AUC = ifelse(is.na(Avg_Zscore_AUC), 0.001, Avg_Zscore_AUC),
Z_lm_L = ifelse(is.na(Z_lm_L), 0.001, Z_lm_L),
Z_lm_K = ifelse(is.na(Z_lm_K), 0.001, Z_lm_K),
Z_lm_r = ifelse(is.na(Z_lm_r), 0.001, Z_lm_r),
Z_lm_AUC = ifelse(is.na(Z_lm_AUC), 0.001, Z_lm_AUC),
L_Rank = rank(Avg_Zscore_L),
K_Rank = rank(Avg_Zscore_K),
r_Rank = rank(Avg_Zscore_r),
AUC_Rank = rank(Avg_Zscore_AUC),
L_Rank_lm = rank(Z_lm_L),
K_Rank_lm = rank(Z_lm_K),
r_Rank_lm = rank(Z_lm_r),
AUC_Rank_lm = rank(Z_lm_AUC)
)
return(df)
}
# Generate ranked plots for a specific metric
generate_ranked_plot <- function(df, rank_var, zscore_var, sd_threshold, title_prefix) {
ggplot(df, aes(x = {{rank_var}}, y = {{zscore_var}})) +
ggtitle(paste(title_prefix, "above", sd_threshold, "SD")) +
xlab("Rank") + ylab(paste("Avg Z score", title_prefix)) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = sd_threshold, ymax = Inf, fill = "#542788", alpha = 0.3) +
annotate("rect", xmin = -Inf, xmax = Inf, ymin = -sd_threshold, ymax = -Inf, fill = "orange", alpha = 0.3) +
geom_hline(yintercept = c(-sd_threshold, sd_threshold)) +
geom_point(size = 0.1, shape = 3) +
theme_publication()
}
# Generate and save all ranked plots
generate_and_save_ranked_plots <- function(df, output_dir, prefix) {
rank_metrics <- list(
list("L_Rank", "Avg_Zscore_L", "L"),
list("K_Rank", "Avg_Zscore_K", "K"),
list("r_Rank", "Avg_Zscore_r", "r"),
list("AUC_Rank", "Avg_Zscore_AUC", "AUC"),
list("L_Rank_lm", "Z_lm_L", "L"),
list("K_Rank_lm", "Z_lm_K", "K"),
list("r_Rank_lm", "Z_lm_r", "r"),
list("AUC_Rank_lm", "Z_lm_AUC", "AUC")
)
pdf(file.path(output_dir, paste0(prefix, ".pdf")), width = 18, height = 12, onefile = TRUE)
for (sd_threshold in c(1, 2, 3)) {
for (metric in rank_metrics) {
plot <- generate_ranked_plot(df, sym(metric[[1]]), sym(metric[[2]]), sd_threshold, metric[[3]])
print(plot)
}
}
dev.off()
}
# Function to create and save all ranked plots
create_ranked_plots <- function(df, output_dir) {
df_adjusted <- adjust_missing_and_rank(df)
# Generate and save ranked plots
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots")
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_lm")
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_naRM")
generate_and_save_ranked_plots(df_adjusted, output_dir, "RankPlots_lm_naRM")
}
main <- function() { main <- function() {
# Applying to all experiments # Applying to all experiments
lapply(names(args$experiments), function(exp_name) { lapply(names(args$experiments), function(exp_name) {
@@ -642,7 +716,12 @@ main <- function() {
lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered), lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered) lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
) )
# Generate cpp correlation plots
generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir) generate_cpp_correlation_plots(zscores_interactions_filtered, lm_list, out_dir)
# Generate ranked plots
create_ranked_plots(zscores_interactions_filtered, out_dir)
}) })
}) })
} }