Fix interaction groupings

This commit is contained in:
2024-10-03 20:58:28 -04:00
parent 4b273db78d
commit b23c6dafef

View File

@@ -163,22 +163,18 @@ load_and_filter_data <- function(easy_results_file, sd = 3) {
return(df) return(df)
} }
# Update Gene names using the SGD gene list
update_gene_names <- function(df, sgd_gene_list) { update_gene_names <- function(df, sgd_gene_list) {
# Load SGD gene list genes <- read.delim(file = sgd_gene_list, quote = "", header = FALSE,
genes <- read.delim(file = sgd_gene_list,
quote = "", header = FALSE,
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))
# Create a named vector for mapping ORF to GeneName gene_map <- setNames(genes$V5, genes$V4) # ORF to GeneName mapping
gene_map <- setNames(genes$V5, genes$V4)
# Vectorized match to find the GeneName from gene_map
mapped_genes <- gene_map[df$ORF]
# Replace NAs in mapped_genes with original Gene names (preserves existing Gene names if ORF is not found)
updated_genes <- ifelse(is.na(mapped_genes) | df$OrfRep == "YDL227C", df$Gene, mapped_genes)
# Ensure Gene is not left blank or incorrectly updated to "OCT1"
df <- df %>% df <- df %>%
mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes)) mutate(
mapped_genes = gene_map[ORF],
Gene = if_else(is.na(mapped_genes) | OrfRep == "YDL227C", Gene, mapped_genes),
Gene = if_else(Gene == "" | Gene == "OCT1", OrfRep, Gene) # Handle invalid names
)
return(df) return(df)
} }
@@ -203,61 +199,82 @@ calculate_summary_stats <- function(df, variables, group_vars) {
) )
# Create a cleaned version of df that doesn't overlap with summary_stats # Create a cleaned version of df that doesn't overlap with summary_stats
cleaned_df <- df %>% df_cleaned <- df %>%
select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars))) select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars)))
df_joined <- left_join(cleaned_df, summary_stats, by = group_vars) df_joined <- left_join(df_cleaned, summary_stats, by = group_vars)
return(list(summary_stats = summary_stats, df_with_stats = df_joined)) return(list(summary_stats = summary_stats, df_with_stats = df_joined))
} }
calculate_interaction_scores <- function(df, df_bg, group_vars) { calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshold = 2) {
max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE) max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
total_conc_num <- length(unique(df$conc_num))
# Include background statistics per concentration # Calculate WT statistics from df_bg
bg_stats <- df_bg %>% wt_stats <- df_bg %>%
group_by(conc_num, conc_num_factor) %>% filter(conc_num == 0) %>% # use the zero drug concentration background
summarise( summarise(
WT_L = first(mean_L), WT_L = mean(mean_L, na.rm = TRUE),
WT_K = first(mean_K), WT_sd_L = mean(sd_L, na.rm = TRUE),
WT_r = first(mean_r), WT_K = mean(mean_K, na.rm = TRUE),
WT_AUC = first(mean_AUC), WT_sd_K = mean(sd_K, na.rm = TRUE),
WT_sd_L = first(sd_L), WT_r = mean(mean_r, na.rm = TRUE),
WT_sd_K = first(sd_K), WT_sd_r = mean(sd_r, na.rm = TRUE),
WT_sd_r = first(sd_r), WT_AUC = mean(mean_AUC, na.rm = TRUE),
WT_sd_AUC = first(sd_AUC), WT_sd_AUC = mean(sd_AUC, na.rm = TRUE)
)
# Add WT statistics to df
df <- df %>%
mutate(
WT_L = wt_stats$WT_L,
WT_sd_L = wt_stats$WT_sd_L,
WT_K = wt_stats$WT_K,
WT_sd_K = wt_stats$WT_sd_K,
WT_r = wt_stats$WT_r,
WT_sd_r = wt_stats$WT_sd_r,
WT_AUC = wt_stats$WT_AUC,
WT_sd_AUC = wt_stats$WT_sd_AUC
)
# Compute mean values at zero concentration
mean_L_zero_df <- df %>%
filter(conc_num == 0) %>%
group_by(across(all_of(group_vars))) %>%
summarise(
mean_L_zero = mean(mean_L, na.rm = TRUE),
mean_K_zero = mean(mean_K, na.rm = TRUE),
mean_r_zero = mean(mean_r, na.rm = TRUE),
mean_AUC_zero = mean(mean_AUC, na.rm = TRUE),
.groups = "drop" .groups = "drop"
) )
# Calculate total number of concentrations # Join mean_L_zero_df to df
total_conc_num <- length(unique(df$conc_num)) df <- df %>%
left_join(mean_L_zero_df, by = group_vars)
# Join background statistics to df # Calculate Raw Shifts and Z Shifts
calculations <- df %>% df <- df %>%
left_join(bg_stats, by = c("conc_num", "conc_num_factor"))
# Perform calculations
calculations <- calculations %>%
group_by(across(all_of(group_vars))) %>%
mutate( mutate(
N = n(), Raw_Shift_L = mean_L_zero - WT_L,
NG = sum(NG, na.rm = TRUE), Raw_Shift_K = mean_K_zero - WT_K,
DB = sum(DB, na.rm = TRUE), Raw_Shift_r = mean_r_zero - WT_r,
SM = sum(SM, na.rm = TRUE), Raw_Shift_AUC = mean_AUC_zero - WT_AUC,
num_non_removed_concs = n_distinct(conc_num[DB != 1]),
# Raw shifts
Raw_Shift_L = mean_L - WT_L,
Raw_Shift_K = mean_K - WT_K,
Raw_Shift_r = mean_r - WT_r,
Raw_Shift_AUC = mean_AUC - WT_AUC,
# Z shifts
Z_Shift_L = Raw_Shift_L / WT_sd_L, Z_Shift_L = Raw_Shift_L / WT_sd_L,
Z_Shift_K = Raw_Shift_K / WT_sd_K, Z_Shift_K = Raw_Shift_K / WT_sd_K,
Z_Shift_r = Raw_Shift_r / WT_sd_r, Z_Shift_r = Raw_Shift_r / WT_sd_r,
Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC, Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC
)
calculations <- df %>%
group_by(across(all_of(group_vars))) %>%
mutate(
NG_sum = sum(NG, na.rm = TRUE),
DB_sum = sum(DB, na.rm = TRUE),
SM_sum = sum(SM, na.rm = TRUE),
num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
# Expected values # Expected values
Exp_L = WT_L + Raw_Shift_L, Exp_L = WT_L + Raw_Shift_L,
@@ -271,38 +288,32 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
Delta_r = mean_r - Exp_r, Delta_r = mean_r - Exp_r,
Delta_AUC = mean_AUC - Exp_AUC, Delta_AUC = mean_AUC - Exp_AUC,
# Adjust Deltas for NG and SM # Adjust deltas for NG and SM
Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L), Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K), Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r), Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC), Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L), Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
# Z-scores # Calculate Z-scores
Zscore_L = Delta_L / WT_sd_L, Zscore_L = Delta_L / WT_sd_L,
Zscore_K = Delta_K / WT_sd_K, Zscore_K = Delta_K / WT_sd_K,
Zscore_r = Delta_r / WT_sd_r, Zscore_r = Delta_r / WT_sd_r,
Zscore_AUC = Delta_AUC / WT_sd_AUC Zscore_AUC = Delta_AUC / WT_sd_AUC
) %>% ) %>%
ungroup()
# Fit linear models within each group
calculations <- calculations %>%
group_by(across(all_of(group_vars))) %>%
group_modify(~ { group_modify(~ {
data <- .x # Perform linear models
# Fit linear models lm_L <- lm(Delta_L ~ conc_num_factor, data = .x)
lm_L <- lm(Delta_L ~ conc_num_factor, data = data) lm_K <- lm(Delta_K ~ conc_num_factor, data = .x)
lm_K <- lm(Delta_K ~ conc_num_factor, data = data) lm_r <- lm(Delta_r ~ conc_num_factor, data = .x)
lm_r <- lm(Delta_r ~ conc_num_factor, data = data) lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x)
lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = data)
data <- data %>% .x %>%
mutate( mutate(
lm_intercept_L = coef(lm_L)[1], lm_intercept_L = coef(lm_L)[1],
lm_slope_L = coef(lm_L)[2], lm_slope_L = coef(lm_L)[2],
R_Squared_L = summary(lm_L)$r.squared, R_Squared_L = summary(lm_L)$r.squared,
lm_Score_L = max_conc * lm_slope_L + lm_intercept_L, lm_Score_L = max_conc * lm_slope_L + lm_intercept_L,
# Repeat for K, r, and AUC
lm_intercept_K = coef(lm_K)[1], lm_intercept_K = coef(lm_K)[1],
lm_slope_K = coef(lm_K)[2], lm_slope_K = coef(lm_K)[2],
R_Squared_K = summary(lm_K)$r.squared, R_Squared_K = summary(lm_K)$r.squared,
@@ -316,11 +327,10 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
R_Squared_AUC = summary(lm_AUC)$r.squared, R_Squared_AUC = summary(lm_AUC)$r.squared,
lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC
) )
return(data)
}) %>% }) %>%
ungroup() ungroup()
# Compute lm means and sds across all data without grouping # Summary statistics for lm scores
lm_means_sds <- calculations %>% lm_means_sds <- calculations %>%
summarise( summarise(
lm_mean_L = mean(lm_Score_L, na.rm = TRUE), lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
@@ -330,26 +340,39 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
lm_mean_r = mean(lm_Score_r, na.rm = TRUE), lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
lm_sd_r = sd(lm_Score_r, na.rm = TRUE), lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE), lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE) lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE),
.groups = "drop"
) )
# Apply global lm means and sds to calculate Z_lm_*
calculations <- calculations %>% calculations <- calculations %>%
mutate( mutate(
Z_lm_L = (lm_Score_L - lm_means_sds$lm_mean_L) / lm_means_sds$lm_sd_L, lm_mean_L = lm_means_sds$lm_mean_L,
Z_lm_K = (lm_Score_K - lm_means_sds$lm_mean_K) / lm_means_sds$lm_sd_K, lm_sd_L = lm_means_sds$lm_sd_L,
Z_lm_r = (lm_Score_r - lm_means_sds$lm_mean_r) / lm_means_sds$lm_sd_r, lm_mean_K = lm_means_sds$lm_mean_K,
Z_lm_AUC = (lm_Score_AUC - lm_means_sds$lm_mean_AUC) / lm_means_sds$lm_sd_AUC lm_sd_K = lm_means_sds$lm_sd_K,
lm_mean_r = lm_means_sds$lm_mean_r,
lm_sd_r = lm_means_sds$lm_sd_r,
lm_mean_AUC = lm_means_sds$lm_mean_AUC,
lm_sd_AUC = lm_means_sds$lm_sd_AUC
) )
# Build interactions data frame # Continue with gene Z-scores and interactions
calculations <- calculations %>%
mutate(
Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
)
# Build summary stats (interactions)
interactions <- calculations %>% interactions <- calculations %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
summarise( summarise(
Avg_Zscore_L = mean(Zscore_L, na.rm = TRUE), Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
Avg_Zscore_K = mean(Zscore_K, na.rm = TRUE), Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
Avg_Zscore_r = mean(Zscore_r, na.rm = TRUE), Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / (total_conc_num - 1),
Avg_Zscore_AUC = mean(Zscore_AUC, na.rm = TRUE), Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / (total_conc_num - 1),
# Interaction Z-scores # Interaction Z-scores
Z_lm_L = first(Z_lm_L), Z_lm_L = first(Z_lm_L),
@@ -369,27 +392,67 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
Z_Shift_r = first(Z_Shift_r), Z_Shift_r = first(Z_Shift_r),
Z_Shift_AUC = first(Z_Shift_AUC), Z_Shift_AUC = first(Z_Shift_AUC),
# NG, DB, SM values
NG = first(NG),
DB = first(DB),
SM = first(SM),
# R Squared values # R Squared values
R_Squared_L = first(R_Squared_L), R_Squared_L = first(R_Squared_L),
R_Squared_K = first(R_Squared_K), R_Squared_K = first(R_Squared_K),
R_Squared_r = first(R_Squared_r), R_Squared_r = first(R_Squared_r),
R_Squared_AUC = first(R_Squared_AUC), R_Squared_AUC = first(R_Squared_AUC),
# Include Drug # NG, DB, SM values
Drug = first(Drug), NG = first(NG),
DB = first(DB),
SM = first(SM),
.groups = "drop" .groups = "drop"
) )
# Return the dataframes without creating full_data # Creating the final calculations and interactions dataframes with only required columns for csv output
calculations_df <- calculations %>%
select(
all_of(group_vars),
conc_num, conc_num_factor, conc_num_factor_factor,
N, NG, DB, SM,
mean_L, median_L, sd_L, se_L,
mean_K, median_K, sd_K, se_K,
mean_r, median_r, sd_r, se_r,
mean_AUC, median_AUC, sd_AUC, se_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
WT_L, WT_K, WT_r, WT_AUC,
WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
Exp_L, Exp_K, Exp_r, Exp_AUC,
Delta_L, Delta_K, Delta_r, Delta_AUC,
Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
)
interactions_df <- interactions %>%
select(
all_of(group_vars),
NG, DB, SM,
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC
)
calculations_no_overlap <- calculations %>%
# DB, NG, SM are same as in interactions, the rest may be different and need to be checked
select(-any_of(c(
"DB", "NG", "SM",
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
"Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC"
)))
# Use left_join to avoid dimension mismatch issues
full_data <- calculations_no_overlap %>%
left_join(interactions, by = group_vars)
# Return full_data and the two required dataframes (calculations and interactions)
return(list( return(list(
calculations = calculations, calculations = calculations_df,
interactions = interactions interactions = interactions_df,
full_data = full_data
)) ))
} }
@@ -475,9 +538,11 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
), color = error_bar_color) ), color = error_bar_color)
} }
} else { } else {
# Original code for calculating from mean and sd # Ensure the mean and sd columns exist
y_mean_col <- paste0("mean_", config$y_var) y_mean_col <- paste0("mean_", config$y_var)
y_sd_col <- paste0("sd_", config$y_var) y_sd_col <- paste0("sd_", config$y_var)
if (y_mean_col %in% colnames(df) && y_sd_col %in% colnames(df)) {
plot <- plot + geom_errorbar(aes( plot <- plot + geom_errorbar(aes(
x = .data[[config$x_var]], x = .data[[config$x_var]],
ymin = .data[[y_mean_col]] - .data[[y_sd_col]], ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
@@ -485,6 +550,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
), color = error_bar_color) ), color = error_bar_color)
} }
} }
}
# Convert ggplot to plotly for interactive version # Convert ggplot to plotly for interactive version
plotly_plot <- suppressWarnings(plotly::ggplotly(plot)) plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
@@ -714,7 +780,7 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df
return(list(plots = plot_configs)) return(list(plots = plot_configs))
} }
generate_interaction_plot_configs <- function(df, df_calculations, df_interactions, type) { generate_interaction_plot_configs <- function(df, type) {
# Define the y-limits for the plots # Define the y-limits for the plots
limits_map <- list( limits_map <- list(
@@ -762,13 +828,14 @@ generate_interaction_plot_configs <- function(df, df_calculations, df_interactio
) )
plot_config$position <- "jitter" plot_config$position <- "jitter"
# Annotation labels
annotations <- list( annotations <- list(
list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min list(x = 0, y = y_limits[1] + 0.1 * y_span, label = "NG ="),
list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="), list(x = 0, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
list(x = 0.25, y = y_limits[1], label = "SM =") list(x = 0, y = y_limits[1], label = "SM =")
) )
# Loop over unique x values and add NG, DB, SM values at calculated y positions # Annotation values
for (x_val in unique(df$conc_num_factor_factor)) { for (x_val in unique(df$conc_num_factor_factor)) {
current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val) current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
annotations <- append(annotations, list( annotations <- append(annotations, list(
@@ -794,7 +861,6 @@ generate_interaction_plot_configs <- function(df, df_calculations, df_interactio
} }
} }
# Delta interaction plots (use df_calculations and df_interactions)
if (type == "reference") { if (type == "reference") {
group_vars <- c("OrfRep", "Gene", "num") group_vars <- c("OrfRep", "Gene", "num")
} else if (type == "deletion") { } else if (type == "deletion") {
@@ -1195,14 +1261,14 @@ main <- function() {
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", ss <- calculate_summary_stats(df_na_within_2sd_k, "L",
group_vars = c("conc_num"))$summary_stats group_vars = c("conc_num"))$summary_stats
write.csv(ss, write.csv(ss,
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2sd_K.csv"), file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2SD_K.csv"),
row.names = FALSE) row.names = FALSE)
message("Calculating summary statistics for L outside 2SD of K") message("Calculating summary statistics for L outside 2SD of K")
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num")) ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num"))
df_na_l_outside_2sd_k_stats <- ss$df_with_stats df_na_l_outside_2sd_k_stats <- ss$df_with_stats
write.csv(ss$summary_stats, write.csv(ss$summary_stats,
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"), file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2SD_K.csv"),
row.names = FALSE) row.names = FALSE)
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs( plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
@@ -1300,11 +1366,10 @@ main <- function() {
plot_configs = plate_analysis_no_zeros_boxplot_configs), plot_configs = plate_analysis_no_zeros_boxplot_configs),
list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K", list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
plot_configs = l_outside_2sd_k_plot_configs), plot_configs = l_outside_2sd_k_plot_configs),
list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2sd_outside_mean_K", list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2SD_outside_mean_K",
plot_configs = delta_bg_outside_2sd_k_plot_configs) plot_configs = delta_bg_outside_2sd_k_plot_configs)
) )
# Generating quality control plots in parallel
# furrr::future_map(plot_configs, function(config) { # furrr::future_map(plot_configs, function(config) {
# generate_and_save_plots(config$out_dir, config$filename, config$plot_configs) # generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
# }, .options = furrr_options(seed = TRUE)) # }, .options = furrr_options(seed = TRUE))
@@ -1325,9 +1390,9 @@ main <- function() {
) %>% ) %>%
filter(!is.na(L)) filter(!is.na(L))
message("Calculating summary statistics for background strain") message("Calculating background strain summary statistics")
ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"), ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"),
group_vars = c("OrfRep", "conc_num")) group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
summary_stats_bg <- ss_bg$summary_stats summary_stats_bg <- ss_bg$summary_stats
df_bg_stats <- ss_bg$df_with_stats df_bg_stats <- ss_bg$df_with_stats
write.csv( write.csv(
@@ -1339,7 +1404,7 @@ main <- function() {
df_reference <- df_na_stats %>% # formerly X2_RF df_reference <- df_na_stats %>% # formerly X2_RF
filter(OrfRep == strain) %>% filter(OrfRep == strain) %>%
filter(!is.na(L)) %>% filter(!is.na(L)) %>%
group_by(conc_num) %>% group_by(OrfRep, Drug, conc_num) %>%
mutate( mutate(
max_l_theoretical = max(max_L, na.rm = TRUE), max_l_theoretical = max(max_L, na.rm = TRUE),
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L), L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
@@ -1347,21 +1412,25 @@ main <- function() {
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>% L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
ungroup() ungroup()
message("Calculating reference strain interaction scores") message("Calculating reference strain summary statistics")
df_reference_stats <- calculate_summary_stats( df_reference_stats <- calculate_summary_stats(
df = df_reference, df = df_reference,
variables = c("L", "K", "r", "AUC"), variables = c("L", "K", "r", "AUC"),
group_vars = c("OrfRep", "Gene", "num", "conc_num") group_vars = c("OrfRep", "Gene", "Drug", "num", "conc_num", "conc_num_factor_factor")
)$df_with_stats )$df_with_stats
reference_results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num")) message("Calculating reference strain interaction scores")
df_calculations_reference <- reference_results$calculations results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug", "num"))
df_interactions_reference <- reference_results$interactions df_calculations_reference <- results$calculations
df_interactions_reference <- results$interactions
df_interactions_reference_joined <- results$full_data
write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
message("Setting missing deletion values to the highest theoretical value at each drug conc for L") message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
df_deletion <- df_na_stats %>% # formerly X2 df_deletion <- df_na_stats %>% # formerly X2
filter(OrfRep != strain) %>% filter(OrfRep != strain) %>%
filter(!is.na(L)) %>% filter(!is.na(L)) %>%
group_by(conc_num) %>% group_by(OrfRep, Gene, conc_num) %>%
mutate( mutate(
max_l_theoretical = max(max_L, na.rm = TRUE), max_l_theoretical = max(max_L, na.rm = TRUE),
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L), L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
@@ -1369,81 +1438,60 @@ main <- function() {
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>% L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
ungroup() ungroup()
message("Calculating deletion strain(s) interactions scores") message("Calculating deletion strain(s) summary statistics")
df_deletion_stats <- calculate_summary_stats( df_deletion_stats <- calculate_summary_stats(
df = df_deletion, df = df_deletion,
variables = c("L", "K", "r", "AUC"), variables = c("L", "K", "r", "AUC"),
group_vars = c("OrfRep", "Gene", "conc_num") group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor")
)$df_with_stats )$df_with_stats
deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene")) message("Calculating deletion strain(s) interactions scores")
df_calculations <- deletion_results$calculations results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
df_interactions <- deletion_results$interactions df_calculations <- results$calculations
df_interactions <- results$interactions
# Writing Z-Scores to file df_interactions_joined <- results$full_data
write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE) write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE) write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
# Create interaction plots # Create interaction plots
message("Generating reference interaction plots") message("Generating reference interaction plots")
reference_plot_configs <- generate_interaction_plot_configs( reference_plot_configs <- generate_interaction_plot_configs(df_interactions_reference_joined, df_bg_stats, "reference")
df_reference_stats, df_calculations_reference, df_interactions_reference, "reference")
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs) generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
message("Generating deletion interaction plots") message("Generating deletion interaction plots")
deletion_plot_configs <- generate_interaction_plot_configs( deletion_plot_configs <- generate_interaction_plot_configs(df_interactions_joined, df_bg_stats, "deletion")
df_deletion_stats, df_calculations, df_interactions, "deletion")
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs) generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
# Define conditions for enhancers and suppressors message("Writing enhancer/suppressor csv files")
# TODO Add to study config? interaction_threshold <- 2 # TODO add to study config?
threshold <- 2 enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold
enhancer_condition_L <- df_interactions$Avg_Zscore_L >= threshold suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold
suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -threshold enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold
enhancer_condition_K <- df_interactions$Avg_Zscore_K >= threshold suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold
suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -threshold
# Subset data
enhancers_L <- df_interactions[enhancer_condition_L, ] enhancers_L <- df_interactions[enhancer_condition_L, ]
suppressors_L <- df_interactions[suppressor_condition_L, ] suppressors_L <- df_interactions[suppressor_condition_L, ]
enhancers_K <- df_interactions[enhancer_condition_K, ] enhancers_K <- df_interactions[enhancer_condition_K, ]
suppressors_K <- df_interactions[suppressor_condition_K, ] suppressors_K <- df_interactions[suppressor_condition_K, ]
enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
# Save enhancers and suppressors enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
message("Writing enhancer/suppressor csv files")
write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE) write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE) write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE) write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE) write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
# Combine conditions for enhancers and suppressors
enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
# Save combined enhancers and suppressors
write.csv(enhancers_and_suppressors_L, write.csv(enhancers_and_suppressors_L,
file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE) file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
write.csv(enhancers_and_suppressors_K, write.csv(enhancers_and_suppressors_K,
file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE) file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
# Handle linear model based enhancers and suppressors
lm_threshold <- 2 # TODO add to study config?
enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_threshold, ]
suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_threshold, ]
enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_threshold, ]
suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_threshold, ]
# Save linear model based enhancers and suppressors
message("Writing linear model enhancer/suppressor csv files") message("Writing linear model enhancer/suppressor csv files")
write.csv(enhancers_lm_L, lm_interaction_threshold <- 2 # TODO add to study config?
file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE) enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ]
write.csv(suppressors_lm_L, suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ]
file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE) enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ]
write.csv(enhancers_lm_K, suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ]
file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE) write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
write.csv(suppressors_lm_K, write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE) write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
message("Generating rank plots") message("Generating rank plots")
rank_plot_configs <- generate_rank_plot_configs( rank_plot_configs <- generate_rank_plot_configs(
@@ -1463,7 +1511,7 @@ main <- function() {
generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm", generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
plot_configs = rank_lm_plot_configs) plot_configs = rank_lm_plot_configs)
overlap_threshold <- 2 overlap_threshold <- 2 # TODO add to study config?
df_interactions_filtered <- df_interactions %>% df_interactions_filtered <- df_interactions %>%
filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>% filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>%
mutate( mutate(