Add correlation plots
This commit is contained in:
@@ -530,60 +530,6 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|||||||
return(plot_configs)
|
return(plot_configs)
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_rank_plot_configs <- function(df, rank_var, zscore_var, label_prefix, is_lm = FALSE) {
|
|
||||||
configs <- list()
|
|
||||||
|
|
||||||
# Adjust titles for _lm plots if is_lm is TRUE
|
|
||||||
plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for"
|
|
||||||
|
|
||||||
for (sd_band in c(1, 2, 3)) {
|
|
||||||
# Annotated version (with text)
|
|
||||||
configs[[length(configs) + 1]] <- list(
|
|
||||||
df = df,
|
|
||||||
x_var = rank_var,
|
|
||||||
y_var = zscore_var,
|
|
||||||
plot_type = "rank",
|
|
||||||
title = paste(plot_title_prefix, label_prefix, "above", sd_band, "SD"),
|
|
||||||
sd_band = sd_band,
|
|
||||||
enhancer_label = list(
|
|
||||||
x = nrow(df) / 2, y = 10,
|
|
||||||
label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ]))
|
|
||||||
),
|
|
||||||
suppressor_label = list(
|
|
||||||
x = nrow(df) / 2, y = -10,
|
|
||||||
label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Non-annotated version (_notext)
|
|
||||||
configs[[length(configs) + 1]] <- list(
|
|
||||||
df = df,
|
|
||||||
x_var = rank_var,
|
|
||||||
y_var = zscore_var,
|
|
||||||
plot_type = "rank",
|
|
||||||
title = paste(plot_title_prefix, label_prefix, "above", sd_band, "SD"),
|
|
||||||
sd_band = sd_band,
|
|
||||||
enhancer_label = NULL, # No annotations for _notext
|
|
||||||
suppressor_label = NULL # No annotations for _notext
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return(configs)
|
|
||||||
}
|
|
||||||
|
|
||||||
generate_correlation_plot_configs <- function(df, lm_list, lm_summaries) {
|
|
||||||
lapply(seq_along(lm_list), function(i) {
|
|
||||||
r_squared <- round(lm_summaries[[i]]$r.squared, 3)
|
|
||||||
list(
|
|
||||||
x_var = names(lm_list)[i][1],
|
|
||||||
y_var = names(lm_list)[i][2],
|
|
||||||
plot_type = "scatter",
|
|
||||||
title = paste("Correlation between", names(lm_list)[i][1], "and", names(lm_list)[i][2]),
|
|
||||||
annotations = list(list(x = 0, y = 0, label = paste("R-squared =", r_squared)))
|
|
||||||
)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
# Adjust missing values and calculate ranks
|
# Adjust missing values and calculate ranks
|
||||||
adjust_missing_and_rank <- function(df, variables) {
|
adjust_missing_and_rank <- function(df, variables) {
|
||||||
|
|
||||||
@@ -599,6 +545,71 @@ adjust_missing_and_rank <- function(df, variables) {
|
|||||||
return(df)
|
return(df)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
generate_rank_plot_configs <- function(df, rank_var, zscore_var, var, is_lm = FALSE) {
|
||||||
|
configs <- list()
|
||||||
|
|
||||||
|
# Adjust titles for _lm plots if is_lm is TRUE
|
||||||
|
plot_title_prefix <- if (is_lm) "Interaction Z score vs. Rank for" else "Average Z score vs. Rank for"
|
||||||
|
|
||||||
|
# Annotated version (with text)
|
||||||
|
for (sd_band in c(1, 2, 3)) {
|
||||||
|
configs[[length(configs) + 1]] <- list(
|
||||||
|
df = df,
|
||||||
|
x_var = rank_var,
|
||||||
|
y_var = zscore_var,
|
||||||
|
plot_type = "rank",
|
||||||
|
title = paste(plot_title_prefix, var, "above", sd_band, "SD"),
|
||||||
|
sd_band = sd_band,
|
||||||
|
enhancer_label = list(
|
||||||
|
x = nrow(df) / 2, y = 10,
|
||||||
|
label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ]))
|
||||||
|
),
|
||||||
|
suppressor_label = list(
|
||||||
|
x = nrow(df) / 2, y = -10,
|
||||||
|
label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Non-annotated version (_notext)
|
||||||
|
for (sd_band in c(1, 2, 3)) {
|
||||||
|
configs[[length(configs) + 1]] <- list(
|
||||||
|
df = df,
|
||||||
|
x_var = rank_var,
|
||||||
|
y_var = zscore_var,
|
||||||
|
plot_type = "rank",
|
||||||
|
title = paste(plot_title_prefix, var, "above", sd_band, "SD"),
|
||||||
|
sd_band = sd_band,
|
||||||
|
enhancer_label = NULL, # No annotations for _notext
|
||||||
|
suppressor_label = NULL # No annotations for _notext
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return(configs)
|
||||||
|
}
|
||||||
|
|
||||||
|
generate_correlation_plot_configs <- function(df, variables) {
|
||||||
|
plot_configs <- list()
|
||||||
|
|
||||||
|
for (variable in variables) {
|
||||||
|
z_lm_var <- paste0("Z_lm_", variable)
|
||||||
|
avg_zscore_var <- paste0("Avg_Zscore_", variable)
|
||||||
|
lm_r_squared_col <- paste0("lm_R_squared_", variable)
|
||||||
|
|
||||||
|
plot_configs[[length(plot_configs) + 1]] <- list(
|
||||||
|
df = df,
|
||||||
|
x_var = avg_zscore_var,
|
||||||
|
y_var = z_lm_var,
|
||||||
|
plot_type = "correlation",
|
||||||
|
title = paste("Avg Zscore vs lm", variable),
|
||||||
|
color_var = "Overlap",
|
||||||
|
correlation_text = paste("R-squared =", round(df[[lm_r_squared_col]][1], 2))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return(plot_configs)
|
||||||
|
}
|
||||||
|
|
||||||
main <- function() {
|
main <- function() {
|
||||||
lapply(names(args$experiments), function(exp_name) {
|
lapply(names(args$experiments), function(exp_name) {
|
||||||
exp <- args$experiments[[exp_name]]
|
exp <- args$experiments[[exp_name]]
|
||||||
@@ -674,10 +685,8 @@ main <- function() {
|
|||||||
write.csv(l_within_2sd_k_stats, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE)
|
write.csv(l_within_2sd_k_stats, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE)
|
||||||
write.csv(l_outside_2sd_k_stats, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE)
|
write.csv(l_outside_2sd_k_stats, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE)
|
||||||
|
|
||||||
# Plots
|
# Plot configurations
|
||||||
|
# Each plots list corresponds to a file
|
||||||
# Print quality control graphs before removing data due to contamination and
|
|
||||||
# adjusting missing data to max theoretical values
|
|
||||||
message("Generating QC plot configurations")
|
message("Generating QC plot configurations")
|
||||||
l_vs_k_plots <- list(
|
l_vs_k_plots <- list(
|
||||||
list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
|
list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
|
||||||
@@ -820,9 +829,9 @@ main <- function() {
|
|||||||
|
|
||||||
# Recalculate summary statistics for the background strain
|
# Recalculate summary statistics for the background strain
|
||||||
message("Calculating summary statistics for background strain")
|
message("Calculating summary statistics for background strain")
|
||||||
ss <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
ss_bg <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||||
summary_stats_bg <- ss$summary_stats
|
summary_stats_bg <- ss_bg$summary_stats
|
||||||
df_bg_stats <- ss$df_with_stats
|
# df_bg_stats <- ss_bg$df_with_stats
|
||||||
write.csv(summary_stats_bg,
|
write.csv(summary_stats_bg,
|
||||||
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
@@ -935,6 +944,7 @@ main <- function() {
|
|||||||
write.csv(suppressors_lm_K,
|
write.csv(suppressors_lm_K,
|
||||||
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
|
file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
|
||||||
|
|
||||||
|
# TODO needs explanation
|
||||||
zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions)
|
zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions)
|
||||||
|
|
||||||
rank_plot_configs <- c(
|
rank_plot_configs <- c(
|
||||||
@@ -945,30 +955,56 @@ main <- function() {
|
|||||||
plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2))
|
plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2))
|
||||||
|
|
||||||
rank_lm_plot_config <- c(
|
rank_lm_plot_config <- c(
|
||||||
generate_rank_lm_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L"),
|
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),
|
||||||
generate_rank_lm_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K")
|
generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE)
|
||||||
)
|
)
|
||||||
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
|
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
|
||||||
plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2))
|
plot_configs = rank_lm_plot_config, grid_layout = list(ncol = 3, nrow = 2))
|
||||||
|
|
||||||
# interaction_scores_filtered
|
# Formerly X_NArm
|
||||||
|
zscores_interactions_filtered <- zscores_interactions %>%
|
||||||
|
group_by(across(all_of(group_vars))) %>%
|
||||||
|
filter(!is.na(Z_lm_L) | !is.na(Avg_Zscore_L))
|
||||||
|
|
||||||
|
zscores_interactions_filtered <- zscores_interactions_filtered %>%
|
||||||
|
mutate(
|
||||||
|
Overlap = case_when(
|
||||||
|
Z_lm_L >= 2 & Avg_Zscore_L >= 2 ~ "Deletion Enhancer Both",
|
||||||
|
Z_lm_L <= -2 & Avg_Zscore_L <= -2 ~ "Deletion Suppressor Both",
|
||||||
|
Z_lm_L >= 2 & Avg_Zscore_L <= 2 ~ "Deletion Enhancer lm only",
|
||||||
|
Z_lm_L <= -2 & Avg_Zscore_L >= -2 ~ "Deletion Suppressor lm only",
|
||||||
|
Z_lm_L >= 2 & Avg_Zscore_L <= -2 ~ "Deletion Enhancer lm, Deletion Suppressor Avg Z score",
|
||||||
|
Z_lm_L <= -2 & Avg_Zscore_L >= 2 ~ "Deletion Suppressor lm, Deletion Enhancer Avg Z score",
|
||||||
|
TRUE ~ "No Effect"
|
||||||
|
),
|
||||||
|
lm_R_squared_L = summary(lm(Z_lm_L ~ Avg_Zscore_L))$r.squared,
|
||||||
|
lm_R_squared_K = summary(lm(Z_lm_K ~ Avg_Zscore_K))$r.squared,
|
||||||
|
lm_R_squared_r = summary(lm(Z_lm_r ~ Avg_Zscore_r))$r.squared,
|
||||||
|
lm_R_squared_AUC = summary(lm(Z_lm_AUC ~ Avg_Zscore_AUC))$r.squared
|
||||||
|
) %>%
|
||||||
|
ungroup()
|
||||||
|
|
||||||
|
rank_plot_configs <- c(
|
||||||
|
generate_rank_plot_configs(zscores_interactions_filtered, "Rank_L", "Avg_Zscore_L", "L"),
|
||||||
|
generate_rank_plot_configs(zscores_interactions_filtered, "Rank_K", "Avg_Zscore_K", "K")
|
||||||
|
)
|
||||||
|
|
||||||
|
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
|
||||||
|
plot_configs = rank_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
||||||
|
|
||||||
|
rank_lm_plot_configs <- c(
|
||||||
|
generate_rank_plot_configs(zscores_interactions_filtered, "Rank_lm_L", "Z_lm_L", "L", is_lm = TRUE),
|
||||||
|
generate_rank_plot_configs(zscores_interactions_filtered, "Rank_lm_K", "Z_lm_K", "K", is_lm = TRUE)
|
||||||
|
)
|
||||||
|
|
||||||
|
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots_lm",
|
||||||
|
plot_configs = rank_lm_plot_configs, grid_layout = list(ncol = 3, nrow = 2))
|
||||||
|
|
||||||
|
correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, variables)
|
||||||
|
generate_and_save_plots(output_dir = out_dir, file_name = "Avg_Zscore_vs_lm_NA_rm",
|
||||||
|
plot_configs = correlation_plot_configs, grid_layout = list(ncol = 2, nrow = 2))
|
||||||
|
|
||||||
# lm_summaries <- lapply(lm_list, summary)
|
|
||||||
# correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
|
|
||||||
# generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs)
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|
||||||
# # Correlation plots
|
|
||||||
# lm_list <- list(
|
|
||||||
# lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
|
|
||||||
# lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
|
|
||||||
# lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered),
|
|
||||||
# lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered),
|
|
||||||
# lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
|
|
||||||
# lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
|
|
||||||
# )
|
|
||||||
Reference in New Issue
Block a user