Add rank plot configs

This commit is contained in:
2024-09-10 03:08:15 -04:00
parent a5c38ec47c
commit 3833f75184

View File

@@ -231,16 +231,17 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
ungroup() ungroup()
interaction_scores <- interaction_scores %>% interaction_scores <- interaction_scores %>%
mutate( group_by(across(all_of(group_vars))) %>%
Raw_Shift_L = mean_L[[1]] - bg_means$L, mutate(
Raw_Shift_K = mean_K[[1]] - bg_means$K, Raw_Shift_L = mean_L[[1]] - bg_means$L,
Raw_Shift_r = mean_r[[1]] - bg_means$r, Raw_Shift_K = mean_K[[1]] - bg_means$K,
Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC, Raw_Shift_r = mean_r[[1]] - bg_means$r,
Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]], Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC,
Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]], Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]],
Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]], Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]],
Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]] Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]],
) Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]]
)
interaction_scores <- interaction_scores %>% interaction_scores <- interaction_scores %>%
mutate( mutate(
@@ -263,28 +264,33 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L) Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L)
) )
# Calculate Z-scores for each variable # Calculate linear models and interaction scores
interaction_scores <- interaction_scores %>% interaction_scores <- interaction_scores %>%
mutate( mutate(
lm_L = lm(Delta_L ~ conc_num_factor),
lm_K = lm(Delta_K ~ conc_num_factor),
lm_r = lm(Delta_r ~ conc_num_factor),
lm_AUC = lm(Delta_AUC ~ conc_num_factor),
Zscore_L = Delta_L / WT_sd_L, Zscore_L = Delta_L / WT_sd_L,
Zscore_K = Delta_K / WT_sd_K, Zscore_K = Delta_K / WT_sd_K,
Zscore_r = Delta_r / WT_sd_r, Zscore_r = Delta_r / WT_sd_r,
Zscore_AUC = Delta_AUC / WT_sd_AUC Zscore_AUC = Delta_AUC / WT_sd_AUC
) )
# Calculate linear models
interaction_scores <- interaction_scores %>% interaction_scores <- interaction_scores %>%
mutate( mutate(
lm_L = lm(Delta_L ~ conc_num_factor), Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
lm_K = lm(Delta_K ~ conc_num_factor), Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
lm_r = lm(Delta_r ~ conc_num_factor), Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
lm_AUC = Dlm(Delta_AUC ~ conc_num_factor) Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
) )
# Calculate linear models and interaction scores
interaction_scores_all <- interaction_scores %>% interaction_scores_all <- interaction_scores %>%
group_by(across(all_of(group_vars))) %>%
mutate( mutate(
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1], lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1],
lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1], lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1],
lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1], lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1],
@@ -301,11 +307,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE), Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE), Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE), Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE), Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / num_non_removed_concs,
Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / num_non_removed_concs,
Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / (total_conc_num - 1),
Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / (total_conc_num - 1)
) )
# Arrange results by Z_lm_L and NG # Arrange results by Z_lm_L and NG
@@ -541,7 +543,6 @@ adjust_missing_and_rank <- function(df, variables) {
return(df) return(df)
} }
main <- function() { main <- function() {
lapply(names(args$experiments), function(exp_name) { lapply(names(args$experiments), function(exp_name) {
exp <- args$experiments[[exp_name]] exp <- args$experiments[[exp_name]]
@@ -625,7 +626,8 @@ main <- function() {
list(df = df, x_var = "L", y_var = "K", plot_type = "scatter", list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
title = "Raw L vs K before QC", title = "Raw L vs K before QC",
color_var = "conc_num", color_var = "conc_num",
legend_position = "right") legend_position = "right"
)
) )
above_threshold_plots <- list( above_threshold_plots <- list(
@@ -678,15 +680,10 @@ main <- function() {
error_bar <- ifelse(plot_type == "scatter", TRUE, FALSE) error_bar <- ifelse(plot_type == "scatter", TRUE, FALSE)
# Create the plot configuration # Create the plot configuration
plot_config <- list( plot_config <- list(df = df_plot, x_var = "scan", y_var = var, plot_type = plot_type,
df = df_plot,
x_var = "scan",
y_var = var,
plot_type = plot_type,
title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"), title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
error_bar = error_bar, error_bar = error_bar, color_var = "conc_num")
color_var = "conc_num"
)
plate_analysis_plots <- append(plate_analysis_plots, list(plot_config)) plate_analysis_plots <- append(plate_analysis_plots, list(plot_config))
} }
} }
@@ -718,14 +715,16 @@ main <- function() {
list(df = X_outside_2SD_K, x_var = "l", y_var = "K", plot_type = "scatter", list(df = X_outside_2SD_K, x_var = "l", y_var = "K", plot_type = "scatter",
title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc", title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
color_var = "conc_num", color_var = "conc_num",
legend_position = "right") legend_position = "right"
)
) )
delta_bg_outside_2sd_k_plots <- list( delta_bg_outside_2sd_k_plots <- list(
list(df = X_outside_2SD_K, x_var = "delta_bg", y_var = "K", plot_type = "scatter", list(df = X_outside_2SD_K, x_var = "delta_bg", y_var = "K", plot_type = "scatter",
title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc", title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
color_var = "conc_num", color_var = "conc_num",
legend_position = "right") legend_position = "right"
)
) )
# Generate and save plots for each QC step # Generate and save plots for each QC step
@@ -744,7 +743,6 @@ main <- function() {
# TODO: Originally this filtered L NA's # TODO: Originally this filtered L NA's
# Let's try to avoid for now since stats have already been calculated # Let's try to avoid for now since stats have already been calculated
# Process background strains # Process background strains
bg_strains <- c("YDL227C") bg_strains <- c("YDL227C")
lapply(bg_strains, function(strain) { lapply(bg_strains, function(strain) {
@@ -847,10 +845,10 @@ main <- function() {
# Save enhancers and suppressors # Save enhancers and suppressors
message("Writing enhancer/suppressor csv files") message("Writing enhancer/suppressor csv files")
write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L.csv"), row.names = FALSE) write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_L.csv"), row.names = FALSE)
write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L.csv"), row.names = FALSE) write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K.csv"), row.names = FALSE) write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_K.csv"), row.names = FALSE)
write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K.csv"), row.names = FALSE) write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K.csv"), row.names = FALSE)
# Combine conditions for enhancers and suppressors # Combine conditions for enhancers and suppressors
enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ] enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ]
@@ -858,9 +856,9 @@ main <- function() {
# Save combined enhancers and suppressors # Save combined enhancers and suppressors
write.csv(enhancers_and_suppressors_L, write.csv(enhancers_and_suppressors_L,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_L.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_and_Suppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_and_suppressors_K, write.csv(enhancers_and_suppressors_K,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_and_Suppressors_K.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_and_Suppressors_K.csv"), row.names = FALSE)
# Handle linear model based enhancers and suppressors # Handle linear model based enhancers and suppressors
lm_threshold <- 2 lm_threshold <- 2
@@ -872,37 +870,94 @@ main <- function() {
# Save linear model based enhancers and suppressors # Save linear model based enhancers and suppressors
message("Writing linear model enhancer/suppressor csv files") message("Writing linear model enhancer/suppressor csv files")
write.csv(enhancers_lm_L, write.csv(enhancers_lm_L,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_L_lm.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_L_lm.csv"), row.names = FALSE)
write.csv(suppressors_lm_L, write.csv(suppressors_lm_L,
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_L_lm.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_L_lm.csv"), row.names = FALSE)
write.csv(enhancers_lm_K, write.csv(enhancers_lm_K,
file = file.path(out_dir, "ZScores_Interaction_DeletionEnhancers_K_lm.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_K_lm.csv"), row.names = FALSE)
write.csv(suppressors_lm_K, write.csv(suppressors_lm_K,
file = file.path(out_dir, "ZScores_Interaction_DeletionSuppressors_K_lm.csv"), row.names = FALSE) file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
# Correlation plots zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions)
lm_list <- list(
lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered), # Generate ranked plots
lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered), rank_plot_config <- list(
lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered), # L Rank plots with different SD thresholds
lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered), list(df = zscores_interactions_adjusted, x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "rank",
lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered), title = "Average Z score vs. Rank for L above 1SD", sd_band = 1,
lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered) enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
label = paste("Deletion Enhancers =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L >= 1, ]))),
suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
label = paste("Deletion Suppressors =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L <= -1, ])))
),
list(df = zscores_interactions_adjusted, x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "rank",
title = "Average Z score vs. Rank for L above 2SD", sd_band = 2,
enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
label = paste("Deletion Enhancers =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L >= 2, ]))),
suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
label = paste("Deletion Suppressors =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L <= -2, ])))
),
list(df = zscores_interactions_adjusted, x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "rank",
title = "Average Z score vs. Rank for L above 3SD", sd_band = 3,
enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
label = paste("Deletion Enhancers =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L >= 3, ]))),
suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
label = paste("Deletion Suppressors =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_L <= -3, ])))
),
# K Rank plots with different SD thresholds
list(df = zscores_interactions_adjusted, x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "rank",
title = "Average Z score vs. Rank for K above 1SD", sd_band = 1,
enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
label = paste("Deletion Enhancers =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K >= 1, ]))),
suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
label = paste("Deletion Suppressors =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K <= -1, ])))
),
list(df = zscores_interactions_adjusted, x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "rank",
title = "Average Z score vs. Rank for K above 2SD", sd_band = 2,
enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
label = paste("Deletion Enhancers =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K >= 2, ]))),
suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
label = paste("Deletion Suppressors =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K <= -2, ])))
),
list(df = zscores_interactions_adjusted, x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "rank",
title = "Average Z score vs. Rank for K above 3SD", sd_band = 3,
enhancer_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = 10,
label = paste("Deletion Enhancers =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K >= 3, ]))),
suppressor_label = list(x = nrow(zscores_interactions_adjusted) / 2, y = -10,
label = paste("Deletion Suppressors =",
nrow(zscores_interactions_adjusted[zscores_interactions_adjusted$Avg_Zscore_K <= -3, ])))
)
) )
# Generate and save rank plots
generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2))
# # Correlation plots
# lm_list <- list(
# lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
# lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
# lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered),
# lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered),
# lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
# lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
# )
lm_summaries <- lapply(lm_list, summary) lm_summaries <- lapply(lm_list, summary)
correlation_plot_configs <- correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries) correlation_plot_configs <- correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs) generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs)
# Generate ranked plots
rank_plot_config <- list(
list(x_var = "L_Rank", y_var = "Avg_Zscore_L", plot_type = "scatter", title = "Rank vs Avg Z score for L"),
list(x_var = "K_Rank", y_var = "Avg_Zscore_K", plot_type = "scatter", title = "Rank vs Avg Z score for K"),
list(x_var = "r_Rank", y_var = "Avg_Zscore_r", plot_type = "scatter", title = "Rank vs Avg Z score for r"),
list(x_var = "AUC_Rank", y_var = "Avg_Zscore_AUC", plot_type = "scatter", title = "Rank vs Avg Z score for AUC")
)
# Generate and save rank plots using the existing plotting framework
generate_and_save_plots(zscores_interactions_filtered, output_dir, rank_plot_config)
}) })
}) })
} }