|
@@ -160,6 +160,11 @@ load_and_filter_data <- function(easy_results_file, sd = 3) {
|
|
|
conc_num_factor_factor = as.factor(conc_num)
|
|
|
)
|
|
|
|
|
|
+ # Set the max concentration across the whole dataframe
|
|
|
+ max_conc <- max(df$conc_num_factor, na.rm = TRUE)
|
|
|
+ df <- df %>%
|
|
|
+ mutate(max_conc = max_conc)
|
|
|
+
|
|
|
return(df)
|
|
|
}
|
|
|
|
|
@@ -211,7 +216,7 @@ calculate_summary_stats <- function(df, variables, group_vars) {
|
|
|
return(list(summary_stats = summary_stats, df_with_stats = df_joined))
|
|
|
}
|
|
|
|
|
|
-calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, overlap_threshold = 2) {
|
|
|
+calculate_interaction_scores <- function(df, bg_stats, group_vars, overlap_threshold = 2) {
|
|
|
|
|
|
# Calculate total concentration variables
|
|
|
total_conc_num <- length(unique(df$conc_num))
|
|
@@ -294,16 +299,12 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats, group_vars, ove
|
|
|
lm_means_sds <- calculations %>%
|
|
|
group_by(across(all_of(group_vars))) %>%
|
|
|
summarise(
|
|
|
- mean_mean_L = mean(mean_L, na.rm = TRUE),
|
|
|
mean_lm_L = mean(lm_Score_L, na.rm = TRUE),
|
|
|
sd_lm_L = sd(lm_Score_L, na.rm = TRUE),
|
|
|
- mean_mean_K = mean(mean_K, na.rm = TRUE),
|
|
|
mean_lm_K = mean(lm_Score_K, na.rm = TRUE),
|
|
|
sd_lm_K = sd(lm_Score_K, na.rm = TRUE),
|
|
|
- mean_mean_r = mean(mean_r, na.rm = TRUE),
|
|
|
mean_lm_r = mean(lm_Score_r, na.rm = TRUE),
|
|
|
sd_lm_r = sd(lm_Score_r, na.rm = TRUE),
|
|
|
- mean_mean_AUC = mean(mean_AUC, na.rm = TRUE),
|
|
|
mean_lm_AUC = mean(lm_Score_AUC, na.rm = TRUE),
|
|
|
sd_lm_AUC = sd(lm_Score_AUC, na.rm = TRUE)
|
|
|
)
|
|
@@ -469,7 +470,12 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
|
|
|
"red"
|
|
|
}
|
|
|
|
|
|
- y_mean_col <- paste0("mean_mean_", config$y_var)
|
|
|
+ y_mean_prefix <- if (!is.null(config$error_bar_params$y_mean_prefix)) {
|
|
|
+ config$error_bar_params$y_mean_prefix
|
|
|
+ } else {
|
|
|
+ "mean_"
|
|
|
+ }
|
|
|
+ y_mean_col <- paste0(y_mean_prefix, config$y_var)
|
|
|
|
|
|
# Dynamically set y_sd_col based on the provided prefix in error_bar_params
|
|
|
y_sd_prefix <- if (!is.null(config$error_bar_params$y_sd_prefix)) {
|
|
@@ -482,7 +488,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
|
|
|
if (!is.null(config$error_bar_params$center_point)) {
|
|
|
plot <- plot + geom_point(aes(
|
|
|
x = .data[[config$x_var]],
|
|
|
- y = .data[[y_mean_col]]),
|
|
|
+ y = first(.data[[y_mean_col]])),
|
|
|
color = error_bar_color,
|
|
|
shape = 16)
|
|
|
}
|
|
@@ -495,14 +501,12 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
|
|
|
color = error_bar_color)
|
|
|
} else {
|
|
|
plot <- plot + geom_errorbar(aes(
|
|
|
- ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
|
|
|
- ymax = .data[[y_mean_col]] + .data[[y_sd_col]]),
|
|
|
+ ymin = first(.data[[y_mean_col]]) - first(.data[[y_sd_col]]),
|
|
|
+ ymax = first(.data[[y_mean_col]]) + first(.data[[y_sd_col]])),
|
|
|
color = error_bar_color)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-
|
|
|
# Convert ggplot to plotly for interactive version
|
|
|
plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
|
|
|
|
|
@@ -546,7 +550,7 @@ generate_scatter_plot <- function(plot, config) {
|
|
|
size <- if (!is.null(config$size)) config$size else 1.5
|
|
|
position <-
|
|
|
if (!is.null(config$position) && config$position == "jitter") {
|
|
|
- position_jitter(width = 0.3, height = 0.1)
|
|
|
+ position_jitter(width = 0.4, height = 0.1)
|
|
|
} else {
|
|
|
"identity"
|
|
|
}
|
|
@@ -752,7 +756,7 @@ generate_interaction_plot_configs <- function(df, type) {
|
|
|
|
|
|
for (var in names(limits_map)) {
|
|
|
y_limits <- limits_map[[var]]
|
|
|
- y_span <- y_limits[2] - y_limits[1]
|
|
|
+ y_span <- y_limits[2] - y_limits[1]
|
|
|
|
|
|
# Common plot configuration
|
|
|
plot_config <- list(
|
|
@@ -773,15 +777,16 @@ generate_interaction_plot_configs <- function(df, type) {
|
|
|
plot_config$error_bar <- TRUE
|
|
|
plot_config$error_bar_params <- list(
|
|
|
y_sd_prefix = "WT_sd_",
|
|
|
+ y_mean_prefix = "mean_",
|
|
|
color = "red",
|
|
|
center_point = TRUE
|
|
|
)
|
|
|
plot_config$position <- "jitter"
|
|
|
|
|
|
annotations <- list(
|
|
|
- list(x = -0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min
|
|
|
- list(x = -0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
|
|
|
- list(x = -0.25, y = y_limits[1], label = "SM =")
|
|
|
+ list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min
|
|
|
+ list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
|
|
|
+ list(x = 0.25, y = y_limits[1], label = "SM =")
|
|
|
)
|
|
|
|
|
|
# Loop over unique x values and add NG, DB, SM values at calculated y positions
|
|
@@ -1067,70 +1072,35 @@ main <- function() {
|
|
|
out_dir_qc <- file.path(exp_path, "zscores", "qc")
|
|
|
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
|
|
|
dir.create(out_dir_qc, recursive = TRUE, showWarnings = FALSE)
|
|
|
-
|
|
|
- summary_vars <- c("L", "K", "r", "AUC", "delta_bg") # fields to filter and calculate summary stats across
|
|
|
- interaction_vars <- c("L", "K", "r", "AUC") # fields to calculate interaction z-scores
|
|
|
|
|
|
message("Loading and filtering data for experiment: ", exp_name)
|
|
|
df <- load_and_filter_data(args$easy_results_file, sd = exp_sd) %>%
|
|
|
update_gene_names(args$sgd_gene_list) %>%
|
|
|
as_tibble()
|
|
|
|
|
|
- # Filter rows above delta background tolerance
|
|
|
- df_above_tolerance <- df %>% filter(DB == 1)
|
|
|
- df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .))) # formerly X
|
|
|
- df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
|
|
|
-
|
|
|
- # Save some constants
|
|
|
- max_conc <- max(df$conc_num_factor)
|
|
|
-
|
|
|
message("Calculating summary statistics before quality control")
|
|
|
df_stats <- calculate_summary_stats(
|
|
|
df = df,
|
|
|
- variables = summary_vars,
|
|
|
+ variables = c("L", "K", "r", "AUC", "delta_bg"),
|
|
|
group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))$df_with_stats
|
|
|
-
|
|
|
+
|
|
|
message("Calculating summary statistics after quality control")
|
|
|
+ df_na <- df %>% mutate(across(all_of(c("L", "K", "r", "AUC", "delta_bg")), ~ ifelse(DB == 1, NA, .))) # formerly X
|
|
|
ss <- calculate_summary_stats(
|
|
|
df = df_na,
|
|
|
- variables = summary_vars,
|
|
|
+ variables = c("L", "K", "r", "AUC", "delta_bg"),
|
|
|
group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
|
|
df_na_ss <- ss$summary_stats
|
|
|
- df_na_stats <- ss$df_with_stats
|
|
|
+ df_na_stats <- ss$df_with_stats # formerly X_stats_ALL
|
|
|
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
|
|
# For plotting (ggplot warns on NAs)
|
|
|
- df_na_stats_filtered <- df_na_stats %>% filter(if_all(all_of(summary_vars), is.finite))
|
|
|
-
|
|
|
- df_na_stats <- df_na_stats %>%
|
|
|
- mutate(
|
|
|
- WT_L = mean_L,
|
|
|
- WT_K = mean_K,
|
|
|
- WT_r = mean_r,
|
|
|
- WT_AUC = mean_AUC,
|
|
|
- WT_sd_L = sd_L,
|
|
|
- WT_sd_K = sd_K,
|
|
|
- WT_sd_r = sd_r,
|
|
|
- WT_sd_AUC = sd_AUC
|
|
|
- )
|
|
|
-
|
|
|
- # Pull the background means and standard deviations from zero concentration for interactions
|
|
|
- bg_stats <- df_na_stats %>%
|
|
|
- filter(conc_num == 0) %>%
|
|
|
- summarise(
|
|
|
- mean_L = first(mean_L),
|
|
|
- mean_K = first(mean_K),
|
|
|
- mean_r = first(mean_r),
|
|
|
- mean_AUC = first(mean_AUC),
|
|
|
- sd_L = first(sd_L),
|
|
|
- sd_K = first(sd_K),
|
|
|
- sd_r = first(sd_r),
|
|
|
- sd_AUC = first(sd_AUC)
|
|
|
- )
|
|
|
+ df_na_stats_filtered <- df_na_stats %>% filter(if_all(all_of(c("L", "K", "r", "AUC", "delta_bg")), is.finite))
|
|
|
|
|
|
message("Calculating summary statistics after quality control excluding zero values")
|
|
|
+ df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
|
|
|
df_no_zeros_stats <- calculate_summary_stats(
|
|
|
df = df_no_zeros,
|
|
|
- variables = summary_vars,
|
|
|
+ variables = c("L", "K", "r", "AUC", "delta_bg"),
|
|
|
group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor")
|
|
|
)$df_with_stats
|
|
|
|
|
@@ -1142,7 +1112,8 @@ main <- function() {
|
|
|
|
|
|
message("Calculating summary statistics for L within 2SD of K")
|
|
|
# TODO We're omitting the original z_max calculation, not sure if needed?
|
|
|
- ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))$summary_stats
|
|
|
+ ss <- calculate_summary_stats(df_na_within_2sd_k, "L",
|
|
|
+ group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))$summary_stats
|
|
|
write.csv(ss,
|
|
|
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2sd_K.csv"),
|
|
|
row.names = FALSE)
|
|
@@ -1153,7 +1124,7 @@ main <- function() {
|
|
|
write.csv(ss$summary_stats,
|
|
|
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"),
|
|
|
row.names = FALSE)
|
|
|
-
|
|
|
+
|
|
|
# Each list of plots corresponds to a file
|
|
|
l_vs_k_plot_configs <- list(
|
|
|
plots = list(
|
|
@@ -1200,6 +1171,7 @@ main <- function() {
|
|
|
)
|
|
|
)
|
|
|
|
|
|
+ df_above_tolerance <- df %>% filter(DB == 1)
|
|
|
above_threshold_plot_configs <- list(
|
|
|
plots = list(
|
|
|
list(
|
|
@@ -1226,26 +1198,26 @@ main <- function() {
|
|
|
)
|
|
|
|
|
|
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
|
|
|
- variables = summary_vars,
|
|
|
+ variables = c("L", "K", "r", "AUC", "delta_bg"),
|
|
|
df_before = df_stats,
|
|
|
df_after = df_na_stats_filtered
|
|
|
)
|
|
|
|
|
|
plate_analysis_boxplot_configs <- generate_plate_analysis_plot_configs(
|
|
|
- variables = summary_vars,
|
|
|
+ variables = c("L", "K", "r", "AUC", "delta_bg"),
|
|
|
df_before = df_stats,
|
|
|
df_after = df_na_stats_filtered,
|
|
|
plot_type = "box"
|
|
|
)
|
|
|
|
|
|
plate_analysis_no_zeros_plot_configs <- generate_plate_analysis_plot_configs(
|
|
|
- variables = summary_vars,
|
|
|
+ variables = c("L", "K", "r", "AUC", "delta_bg"),
|
|
|
stages = c("after"), # Only after QC
|
|
|
df_after = df_no_zeros_stats
|
|
|
)
|
|
|
|
|
|
plate_analysis_no_zeros_boxplot_configs <- generate_plate_analysis_plot_configs(
|
|
|
- variables = summary_vars,
|
|
|
+ variables = c("L", "K", "r", "AUC", "delta_bg"),
|
|
|
stages = c("after"), # Only after QC
|
|
|
df_after = df_no_zeros_stats,
|
|
|
plot_type = "box"
|
|
@@ -1329,7 +1301,6 @@ main <- function() {
|
|
|
# generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
|
|
|
# }, .options = furrr_options(seed = TRUE))
|
|
|
|
|
|
- # Process background strains
|
|
|
bg_strains <- c("YDL227C")
|
|
|
lapply(bg_strains, function(strain) {
|
|
|
|
|
@@ -1349,15 +1320,17 @@ main <- function() {
|
|
|
|
|
|
# Recalculate summary statistics for the background strain
|
|
|
message("Calculating summary statistics for background strain")
|
|
|
- ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
|
|
+ ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"),
|
|
|
+ group_vars = c("OrfRep", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
|
|
|
summary_stats_bg <- ss_bg$summary_stats
|
|
|
+ ss_bg_stats <- ss_bg$df_with_stats
|
|
|
write.csv(summary_stats_bg,
|
|
|
file = file.path(out_dir, paste0("summary_stats_background_strain_", strain, ".csv")),
|
|
|
row.names = FALSE)
|
|
|
|
|
|
# Set the missing values to the highest theoretical value at each drug conc for L
|
|
|
# Leave other values as 0 for the max/min
|
|
|
- df_reference <- df_na_stats %>% # formerly X2_RF
|
|
|
+ df_reference <- df_bg_stats %>% # formerly X2_RF
|
|
|
filter(OrfRep == strain) %>%
|
|
|
filter(!is.na(L)) %>%
|
|
|
group_by(conc_num, conc_num_factor, conc_num_factor_factor) %>%
|
|
@@ -1384,10 +1357,10 @@ main <- function() {
|
|
|
message("Calculating reference strain interaction scores")
|
|
|
df_reference_stats <- calculate_summary_stats(
|
|
|
df = df_reference,
|
|
|
- variables = interaction_vars,
|
|
|
+ variables = c("L", "K", "r", "AUC"),
|
|
|
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
|
|
|
)$df_with_stats
|
|
|
- reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene", "num"))
|
|
|
+ reference_results <- calculate_interaction_scores(df_reference_stats, bg_stats, group_vars = c("OrfRep", "Gene", "num"))
|
|
|
zscore_calculations_reference <- reference_results$calculations
|
|
|
zscore_interactions_reference <- reference_results$interactions
|
|
|
zscore_interactions_reference_joined <- reference_results$full_data
|
|
@@ -1395,10 +1368,10 @@ main <- function() {
|
|
|
message("Calculating deletion strain(s) interactions scores")
|
|
|
df_deletion_stats <- calculate_summary_stats(
|
|
|
df = df_deletion,
|
|
|
- variables = interaction_vars,
|
|
|
+ variables = c("L", "K", "r", "AUC"),
|
|
|
group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")
|
|
|
)$df_with_stats
|
|
|
- deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene"))
|
|
|
+ deletion_results <- calculate_interaction_scores(df_deletion_stats, bg_stats, group_vars = c("OrfRep", "Gene"))
|
|
|
zscore_calculations <- deletion_results$calculations
|
|
|
zscore_interactions <- deletion_results$interactions
|
|
|
zscore_interactions_joined <- deletion_results$full_data
|