Fix interaction groupings
This commit is contained in:
@@ -163,22 +163,18 @@ load_and_filter_data <- function(easy_results_file, sd = 3) {
|
|||||||
return(df)
|
return(df)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Update Gene names using the SGD gene list
|
|
||||||
update_gene_names <- function(df, sgd_gene_list) {
|
update_gene_names <- function(df, sgd_gene_list) {
|
||||||
# Load SGD gene list
|
genes <- read.delim(file = sgd_gene_list, quote = "", header = FALSE,
|
||||||
genes <- read.delim(file = sgd_gene_list,
|
|
||||||
quote = "", header = FALSE,
|
|
||||||
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))
|
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))
|
||||||
|
|
||||||
# Create a named vector for mapping ORF to GeneName
|
gene_map <- setNames(genes$V5, genes$V4) # ORF to GeneName mapping
|
||||||
gene_map <- setNames(genes$V5, genes$V4)
|
|
||||||
# Vectorized match to find the GeneName from gene_map
|
|
||||||
mapped_genes <- gene_map[df$ORF]
|
|
||||||
# Replace NAs in mapped_genes with original Gene names (preserves existing Gene names if ORF is not found)
|
|
||||||
updated_genes <- ifelse(is.na(mapped_genes) | df$OrfRep == "YDL227C", df$Gene, mapped_genes)
|
|
||||||
# Ensure Gene is not left blank or incorrectly updated to "OCT1"
|
|
||||||
df <- df %>%
|
df <- df %>%
|
||||||
mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes))
|
mutate(
|
||||||
|
mapped_genes = gene_map[ORF],
|
||||||
|
Gene = if_else(is.na(mapped_genes) | OrfRep == "YDL227C", Gene, mapped_genes),
|
||||||
|
Gene = if_else(Gene == "" | Gene == "OCT1", OrfRep, Gene) # Handle invalid names
|
||||||
|
)
|
||||||
|
|
||||||
return(df)
|
return(df)
|
||||||
}
|
}
|
||||||
@@ -203,61 +199,82 @@ calculate_summary_stats <- function(df, variables, group_vars) {
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Create a cleaned version of df that doesn't overlap with summary_stats
|
# Create a cleaned version of df that doesn't overlap with summary_stats
|
||||||
cleaned_df <- df %>%
|
df_cleaned <- df %>%
|
||||||
select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars)))
|
select(-any_of(setdiff(intersect(names(df), names(summary_stats)), group_vars)))
|
||||||
|
|
||||||
df_joined <- left_join(cleaned_df, summary_stats, by = group_vars)
|
df_joined <- left_join(df_cleaned, summary_stats, by = group_vars)
|
||||||
|
|
||||||
return(list(summary_stats = summary_stats, df_with_stats = df_joined))
|
return(list(summary_stats = summary_stats, df_with_stats = df_joined))
|
||||||
}
|
}
|
||||||
|
|
||||||
calculate_interaction_scores <- function(df, df_bg, group_vars) {
|
calculate_interaction_scores <- function(df, df_bg, group_vars, overlap_threshold = 2) {
|
||||||
|
|
||||||
max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
|
max_conc <- max(as.numeric(df$conc_num_factor), na.rm = TRUE)
|
||||||
|
total_conc_num <- length(unique(df$conc_num))
|
||||||
|
|
||||||
# Include background statistics per concentration
|
# Calculate WT statistics from df_bg
|
||||||
bg_stats <- df_bg %>%
|
wt_stats <- df_bg %>%
|
||||||
group_by(conc_num, conc_num_factor) %>%
|
filter(conc_num == 0) %>% # use the zero drug concentration background
|
||||||
summarise(
|
summarise(
|
||||||
WT_L = first(mean_L),
|
WT_L = mean(mean_L, na.rm = TRUE),
|
||||||
WT_K = first(mean_K),
|
WT_sd_L = mean(sd_L, na.rm = TRUE),
|
||||||
WT_r = first(mean_r),
|
WT_K = mean(mean_K, na.rm = TRUE),
|
||||||
WT_AUC = first(mean_AUC),
|
WT_sd_K = mean(sd_K, na.rm = TRUE),
|
||||||
WT_sd_L = first(sd_L),
|
WT_r = mean(mean_r, na.rm = TRUE),
|
||||||
WT_sd_K = first(sd_K),
|
WT_sd_r = mean(sd_r, na.rm = TRUE),
|
||||||
WT_sd_r = first(sd_r),
|
WT_AUC = mean(mean_AUC, na.rm = TRUE),
|
||||||
WT_sd_AUC = first(sd_AUC),
|
WT_sd_AUC = mean(sd_AUC, na.rm = TRUE)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add WT statistics to df
|
||||||
|
df <- df %>%
|
||||||
|
mutate(
|
||||||
|
WT_L = wt_stats$WT_L,
|
||||||
|
WT_sd_L = wt_stats$WT_sd_L,
|
||||||
|
WT_K = wt_stats$WT_K,
|
||||||
|
WT_sd_K = wt_stats$WT_sd_K,
|
||||||
|
WT_r = wt_stats$WT_r,
|
||||||
|
WT_sd_r = wt_stats$WT_sd_r,
|
||||||
|
WT_AUC = wt_stats$WT_AUC,
|
||||||
|
WT_sd_AUC = wt_stats$WT_sd_AUC
|
||||||
|
)
|
||||||
|
|
||||||
|
# Compute mean values at zero concentration
|
||||||
|
mean_L_zero_df <- df %>%
|
||||||
|
filter(conc_num == 0) %>%
|
||||||
|
group_by(across(all_of(group_vars))) %>%
|
||||||
|
summarise(
|
||||||
|
mean_L_zero = mean(mean_L, na.rm = TRUE),
|
||||||
|
mean_K_zero = mean(mean_K, na.rm = TRUE),
|
||||||
|
mean_r_zero = mean(mean_r, na.rm = TRUE),
|
||||||
|
mean_AUC_zero = mean(mean_AUC, na.rm = TRUE),
|
||||||
.groups = "drop"
|
.groups = "drop"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Calculate total number of concentrations
|
# Join mean_L_zero_df to df
|
||||||
total_conc_num <- length(unique(df$conc_num))
|
df <- df %>%
|
||||||
|
left_join(mean_L_zero_df, by = group_vars)
|
||||||
|
|
||||||
# Join background statistics to df
|
# Calculate Raw Shifts and Z Shifts
|
||||||
calculations <- df %>%
|
df <- df %>%
|
||||||
left_join(bg_stats, by = c("conc_num", "conc_num_factor"))
|
|
||||||
|
|
||||||
# Perform calculations
|
|
||||||
calculations <- calculations %>%
|
|
||||||
group_by(across(all_of(group_vars))) %>%
|
|
||||||
mutate(
|
mutate(
|
||||||
N = n(),
|
Raw_Shift_L = mean_L_zero - WT_L,
|
||||||
NG = sum(NG, na.rm = TRUE),
|
Raw_Shift_K = mean_K_zero - WT_K,
|
||||||
DB = sum(DB, na.rm = TRUE),
|
Raw_Shift_r = mean_r_zero - WT_r,
|
||||||
SM = sum(SM, na.rm = TRUE),
|
Raw_Shift_AUC = mean_AUC_zero - WT_AUC,
|
||||||
num_non_removed_concs = n_distinct(conc_num[DB != 1]),
|
|
||||||
|
|
||||||
# Raw shifts
|
|
||||||
Raw_Shift_L = mean_L - WT_L,
|
|
||||||
Raw_Shift_K = mean_K - WT_K,
|
|
||||||
Raw_Shift_r = mean_r - WT_r,
|
|
||||||
Raw_Shift_AUC = mean_AUC - WT_AUC,
|
|
||||||
|
|
||||||
# Z shifts
|
|
||||||
Z_Shift_L = Raw_Shift_L / WT_sd_L,
|
Z_Shift_L = Raw_Shift_L / WT_sd_L,
|
||||||
Z_Shift_K = Raw_Shift_K / WT_sd_K,
|
Z_Shift_K = Raw_Shift_K / WT_sd_K,
|
||||||
Z_Shift_r = Raw_Shift_r / WT_sd_r,
|
Z_Shift_r = Raw_Shift_r / WT_sd_r,
|
||||||
Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC,
|
Z_Shift_AUC = Raw_Shift_AUC / WT_sd_AUC
|
||||||
|
)
|
||||||
|
|
||||||
|
calculations <- df %>%
|
||||||
|
group_by(across(all_of(group_vars))) %>%
|
||||||
|
mutate(
|
||||||
|
NG_sum = sum(NG, na.rm = TRUE),
|
||||||
|
DB_sum = sum(DB, na.rm = TRUE),
|
||||||
|
SM_sum = sum(SM, na.rm = TRUE),
|
||||||
|
num_non_removed_concs = total_conc_num - sum(DB, na.rm = TRUE) - 1,
|
||||||
|
|
||||||
# Expected values
|
# Expected values
|
||||||
Exp_L = WT_L + Raw_Shift_L,
|
Exp_L = WT_L + Raw_Shift_L,
|
||||||
@@ -271,38 +288,32 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
|
|||||||
Delta_r = mean_r - Exp_r,
|
Delta_r = mean_r - Exp_r,
|
||||||
Delta_AUC = mean_AUC - Exp_AUC,
|
Delta_AUC = mean_AUC - Exp_AUC,
|
||||||
|
|
||||||
# Adjust Deltas for NG and SM
|
# Adjust deltas for NG and SM
|
||||||
Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
|
Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
|
||||||
Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
|
Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
|
||||||
Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
|
Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
|
||||||
Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
|
Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
|
||||||
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
|
Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L),
|
||||||
|
|
||||||
# Z-scores
|
# Calculate Z-scores
|
||||||
Zscore_L = Delta_L / WT_sd_L,
|
Zscore_L = Delta_L / WT_sd_L,
|
||||||
Zscore_K = Delta_K / WT_sd_K,
|
Zscore_K = Delta_K / WT_sd_K,
|
||||||
Zscore_r = Delta_r / WT_sd_r,
|
Zscore_r = Delta_r / WT_sd_r,
|
||||||
Zscore_AUC = Delta_AUC / WT_sd_AUC
|
Zscore_AUC = Delta_AUC / WT_sd_AUC
|
||||||
) %>%
|
) %>%
|
||||||
ungroup()
|
|
||||||
|
|
||||||
# Fit linear models within each group
|
|
||||||
calculations <- calculations %>%
|
|
||||||
group_by(across(all_of(group_vars))) %>%
|
|
||||||
group_modify(~ {
|
group_modify(~ {
|
||||||
data <- .x
|
# Perform linear models
|
||||||
# Fit linear models
|
lm_L <- lm(Delta_L ~ conc_num_factor, data = .x)
|
||||||
lm_L <- lm(Delta_L ~ conc_num_factor, data = data)
|
lm_K <- lm(Delta_K ~ conc_num_factor, data = .x)
|
||||||
lm_K <- lm(Delta_K ~ conc_num_factor, data = data)
|
lm_r <- lm(Delta_r ~ conc_num_factor, data = .x)
|
||||||
lm_r <- lm(Delta_r ~ conc_num_factor, data = data)
|
lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = .x)
|
||||||
lm_AUC <- lm(Delta_AUC ~ conc_num_factor, data = data)
|
|
||||||
data <- data %>%
|
.x %>%
|
||||||
mutate(
|
mutate(
|
||||||
lm_intercept_L = coef(lm_L)[1],
|
lm_intercept_L = coef(lm_L)[1],
|
||||||
lm_slope_L = coef(lm_L)[2],
|
lm_slope_L = coef(lm_L)[2],
|
||||||
R_Squared_L = summary(lm_L)$r.squared,
|
R_Squared_L = summary(lm_L)$r.squared,
|
||||||
lm_Score_L = max_conc * lm_slope_L + lm_intercept_L,
|
lm_Score_L = max_conc * lm_slope_L + lm_intercept_L,
|
||||||
# Repeat for K, r, and AUC
|
|
||||||
lm_intercept_K = coef(lm_K)[1],
|
lm_intercept_K = coef(lm_K)[1],
|
||||||
lm_slope_K = coef(lm_K)[2],
|
lm_slope_K = coef(lm_K)[2],
|
||||||
R_Squared_K = summary(lm_K)$r.squared,
|
R_Squared_K = summary(lm_K)$r.squared,
|
||||||
@@ -316,11 +327,10 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
|
|||||||
R_Squared_AUC = summary(lm_AUC)$r.squared,
|
R_Squared_AUC = summary(lm_AUC)$r.squared,
|
||||||
lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC
|
lm_Score_AUC = max_conc * lm_slope_AUC + lm_intercept_AUC
|
||||||
)
|
)
|
||||||
return(data)
|
|
||||||
}) %>%
|
}) %>%
|
||||||
ungroup()
|
ungroup()
|
||||||
|
|
||||||
# Compute lm means and sds across all data without grouping
|
# Summary statistics for lm scores
|
||||||
lm_means_sds <- calculations %>%
|
lm_means_sds <- calculations %>%
|
||||||
summarise(
|
summarise(
|
||||||
lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
|
lm_mean_L = mean(lm_Score_L, na.rm = TRUE),
|
||||||
@@ -330,26 +340,39 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
|
|||||||
lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
|
lm_mean_r = mean(lm_Score_r, na.rm = TRUE),
|
||||||
lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
|
lm_sd_r = sd(lm_Score_r, na.rm = TRUE),
|
||||||
lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
|
lm_mean_AUC = mean(lm_Score_AUC, na.rm = TRUE),
|
||||||
lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE)
|
lm_sd_AUC = sd(lm_Score_AUC, na.rm = TRUE),
|
||||||
|
.groups = "drop"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Apply global lm means and sds to calculate Z_lm_*
|
|
||||||
calculations <- calculations %>%
|
calculations <- calculations %>%
|
||||||
mutate(
|
mutate(
|
||||||
Z_lm_L = (lm_Score_L - lm_means_sds$lm_mean_L) / lm_means_sds$lm_sd_L,
|
lm_mean_L = lm_means_sds$lm_mean_L,
|
||||||
Z_lm_K = (lm_Score_K - lm_means_sds$lm_mean_K) / lm_means_sds$lm_sd_K,
|
lm_sd_L = lm_means_sds$lm_sd_L,
|
||||||
Z_lm_r = (lm_Score_r - lm_means_sds$lm_mean_r) / lm_means_sds$lm_sd_r,
|
lm_mean_K = lm_means_sds$lm_mean_K,
|
||||||
Z_lm_AUC = (lm_Score_AUC - lm_means_sds$lm_mean_AUC) / lm_means_sds$lm_sd_AUC
|
lm_sd_K = lm_means_sds$lm_sd_K,
|
||||||
|
lm_mean_r = lm_means_sds$lm_mean_r,
|
||||||
|
lm_sd_r = lm_means_sds$lm_sd_r,
|
||||||
|
lm_mean_AUC = lm_means_sds$lm_mean_AUC,
|
||||||
|
lm_sd_AUC = lm_means_sds$lm_sd_AUC
|
||||||
)
|
)
|
||||||
|
|
||||||
# Build interactions data frame
|
# Continue with gene Z-scores and interactions
|
||||||
|
calculations <- calculations %>%
|
||||||
|
mutate(
|
||||||
|
Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
|
||||||
|
Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
|
||||||
|
Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
|
||||||
|
Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build summary stats (interactions)
|
||||||
interactions <- calculations %>%
|
interactions <- calculations %>%
|
||||||
group_by(across(all_of(group_vars))) %>%
|
group_by(across(all_of(group_vars))) %>%
|
||||||
summarise(
|
summarise(
|
||||||
Avg_Zscore_L = mean(Zscore_L, na.rm = TRUE),
|
Avg_Zscore_L = sum(Zscore_L, na.rm = TRUE) / first(num_non_removed_concs),
|
||||||
Avg_Zscore_K = mean(Zscore_K, na.rm = TRUE),
|
Avg_Zscore_K = sum(Zscore_K, na.rm = TRUE) / first(num_non_removed_concs),
|
||||||
Avg_Zscore_r = mean(Zscore_r, na.rm = TRUE),
|
Avg_Zscore_r = sum(Zscore_r, na.rm = TRUE) / (total_conc_num - 1),
|
||||||
Avg_Zscore_AUC = mean(Zscore_AUC, na.rm = TRUE),
|
Avg_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE) / (total_conc_num - 1),
|
||||||
|
|
||||||
# Interaction Z-scores
|
# Interaction Z-scores
|
||||||
Z_lm_L = first(Z_lm_L),
|
Z_lm_L = first(Z_lm_L),
|
||||||
@@ -369,27 +392,67 @@ calculate_interaction_scores <- function(df, df_bg, group_vars) {
|
|||||||
Z_Shift_r = first(Z_Shift_r),
|
Z_Shift_r = first(Z_Shift_r),
|
||||||
Z_Shift_AUC = first(Z_Shift_AUC),
|
Z_Shift_AUC = first(Z_Shift_AUC),
|
||||||
|
|
||||||
# NG, DB, SM values
|
|
||||||
NG = first(NG),
|
|
||||||
DB = first(DB),
|
|
||||||
SM = first(SM),
|
|
||||||
|
|
||||||
# R Squared values
|
# R Squared values
|
||||||
R_Squared_L = first(R_Squared_L),
|
R_Squared_L = first(R_Squared_L),
|
||||||
R_Squared_K = first(R_Squared_K),
|
R_Squared_K = first(R_Squared_K),
|
||||||
R_Squared_r = first(R_Squared_r),
|
R_Squared_r = first(R_Squared_r),
|
||||||
R_Squared_AUC = first(R_Squared_AUC),
|
R_Squared_AUC = first(R_Squared_AUC),
|
||||||
|
|
||||||
# Include Drug
|
# NG, DB, SM values
|
||||||
Drug = first(Drug),
|
NG = first(NG),
|
||||||
|
DB = first(DB),
|
||||||
|
SM = first(SM),
|
||||||
|
|
||||||
.groups = "drop"
|
.groups = "drop"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Return the dataframes without creating full_data
|
# Creating the final calculations and interactions dataframes with only required columns for csv output
|
||||||
|
calculations_df <- calculations %>%
|
||||||
|
select(
|
||||||
|
all_of(group_vars),
|
||||||
|
conc_num, conc_num_factor, conc_num_factor_factor,
|
||||||
|
N, NG, DB, SM,
|
||||||
|
mean_L, median_L, sd_L, se_L,
|
||||||
|
mean_K, median_K, sd_K, se_K,
|
||||||
|
mean_r, median_r, sd_r, se_r,
|
||||||
|
mean_AUC, median_AUC, sd_AUC, se_AUC,
|
||||||
|
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
|
||||||
|
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC,
|
||||||
|
WT_L, WT_K, WT_r, WT_AUC,
|
||||||
|
WT_sd_L, WT_sd_K, WT_sd_r, WT_sd_AUC,
|
||||||
|
Exp_L, Exp_K, Exp_r, Exp_AUC,
|
||||||
|
Delta_L, Delta_K, Delta_r, Delta_AUC,
|
||||||
|
Zscore_L, Zscore_K, Zscore_r, Zscore_AUC
|
||||||
|
)
|
||||||
|
|
||||||
|
interactions_df <- interactions %>%
|
||||||
|
select(
|
||||||
|
all_of(group_vars),
|
||||||
|
NG, DB, SM,
|
||||||
|
Avg_Zscore_L, Avg_Zscore_K, Avg_Zscore_r, Avg_Zscore_AUC,
|
||||||
|
Z_lm_L, Z_lm_K, Z_lm_r, Z_lm_AUC,
|
||||||
|
Raw_Shift_L, Raw_Shift_K, Raw_Shift_r, Raw_Shift_AUC,
|
||||||
|
Z_Shift_L, Z_Shift_K, Z_Shift_r, Z_Shift_AUC
|
||||||
|
)
|
||||||
|
|
||||||
|
calculations_no_overlap <- calculations %>%
|
||||||
|
# DB, NG, SM are same as in interactions, the rest may be different and need to be checked
|
||||||
|
select(-any_of(c(
|
||||||
|
"DB", "NG", "SM",
|
||||||
|
"Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
|
||||||
|
"Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
|
||||||
|
"Z_lm_L", "Z_lm_K", "Z_lm_r", "Z_lm_AUC"
|
||||||
|
)))
|
||||||
|
|
||||||
|
# Use left_join to avoid dimension mismatch issues
|
||||||
|
full_data <- calculations_no_overlap %>%
|
||||||
|
left_join(interactions, by = group_vars)
|
||||||
|
|
||||||
|
# Return full_data and the two required dataframes (calculations and interactions)
|
||||||
return(list(
|
return(list(
|
||||||
calculations = calculations,
|
calculations = calculations_df,
|
||||||
interactions = interactions
|
interactions = interactions_df,
|
||||||
|
full_data = full_data
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -475,14 +538,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
|
|||||||
), color = error_bar_color)
|
), color = error_bar_color)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
# Original code for calculating from mean and sd
|
# Ensure the mean and sd columns exist
|
||||||
y_mean_col <- paste0("mean_", config$y_var)
|
y_mean_col <- paste0("mean_", config$y_var)
|
||||||
y_sd_col <- paste0("sd_", config$y_var)
|
y_sd_col <- paste0("sd_", config$y_var)
|
||||||
plot <- plot + geom_errorbar(aes(
|
|
||||||
x = .data[[config$x_var]],
|
if (y_mean_col %in% colnames(df) && y_sd_col %in% colnames(df)) {
|
||||||
ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
|
plot <- plot + geom_errorbar(aes(
|
||||||
ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
|
x = .data[[config$x_var]],
|
||||||
), color = error_bar_color)
|
ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
|
||||||
|
ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
|
||||||
|
), color = error_bar_color)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -714,7 +780,7 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df
|
|||||||
return(list(plots = plot_configs))
|
return(list(plots = plot_configs))
|
||||||
}
|
}
|
||||||
|
|
||||||
generate_interaction_plot_configs <- function(df, df_calculations, df_interactions, type) {
|
generate_interaction_plot_configs <- function(df, type) {
|
||||||
|
|
||||||
# Define the y-limits for the plots
|
# Define the y-limits for the plots
|
||||||
limits_map <- list(
|
limits_map <- list(
|
||||||
@@ -762,13 +828,14 @@ generate_interaction_plot_configs <- function(df, df_calculations, df_interactio
|
|||||||
)
|
)
|
||||||
plot_config$position <- "jitter"
|
plot_config$position <- "jitter"
|
||||||
|
|
||||||
|
# Annotation labels
|
||||||
annotations <- list(
|
annotations <- list(
|
||||||
list(x = 0.25, y = y_limits[1] + 0.1 * y_span, label = "NG ="), # Slightly above y-min
|
list(x = 0, y = y_limits[1] + 0.1 * y_span, label = "NG ="),
|
||||||
list(x = 0.25, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
|
list(x = 0, y = y_limits[1] + 0.05 * y_span, label = "DB ="),
|
||||||
list(x = 0.25, y = y_limits[1], label = "SM =")
|
list(x = 0, y = y_limits[1], label = "SM =")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Loop over unique x values and add NG, DB, SM values at calculated y positions
|
# Annotation values
|
||||||
for (x_val in unique(df$conc_num_factor_factor)) {
|
for (x_val in unique(df$conc_num_factor_factor)) {
|
||||||
current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
|
current_df <- df %>% filter(.data[[plot_config$x_var]] == x_val)
|
||||||
annotations <- append(annotations, list(
|
annotations <- append(annotations, list(
|
||||||
@@ -794,7 +861,6 @@ generate_interaction_plot_configs <- function(df, df_calculations, df_interactio
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Delta interaction plots (use df_calculations and df_interactions)
|
|
||||||
if (type == "reference") {
|
if (type == "reference") {
|
||||||
group_vars <- c("OrfRep", "Gene", "num")
|
group_vars <- c("OrfRep", "Gene", "num")
|
||||||
} else if (type == "deletion") {
|
} else if (type == "deletion") {
|
||||||
@@ -1195,14 +1261,14 @@ main <- function() {
|
|||||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L",
|
ss <- calculate_summary_stats(df_na_within_2sd_k, "L",
|
||||||
group_vars = c("conc_num"))$summary_stats
|
group_vars = c("conc_num"))$summary_stats
|
||||||
write.csv(ss,
|
write.csv(ss,
|
||||||
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2sd_K.csv"),
|
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2SD_K.csv"),
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
|
|
||||||
message("Calculating summary statistics for L outside 2SD of K")
|
message("Calculating summary statistics for L outside 2SD of K")
|
||||||
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num"))
|
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num"))
|
||||||
df_na_l_outside_2sd_k_stats <- ss$df_with_stats
|
df_na_l_outside_2sd_k_stats <- ss$df_with_stats
|
||||||
write.csv(ss$summary_stats,
|
write.csv(ss$summary_stats,
|
||||||
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"),
|
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2SD_K.csv"),
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
|
|
||||||
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
|
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
|
||||||
@@ -1300,11 +1366,10 @@ main <- function() {
|
|||||||
plot_configs = plate_analysis_no_zeros_boxplot_configs),
|
plot_configs = plate_analysis_no_zeros_boxplot_configs),
|
||||||
list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
|
list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
|
||||||
plot_configs = l_outside_2sd_k_plot_configs),
|
plot_configs = l_outside_2sd_k_plot_configs),
|
||||||
list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2sd_outside_mean_K",
|
list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2SD_outside_mean_K",
|
||||||
plot_configs = delta_bg_outside_2sd_k_plot_configs)
|
plot_configs = delta_bg_outside_2sd_k_plot_configs)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Generating quality control plots in parallel
|
|
||||||
# furrr::future_map(plot_configs, function(config) {
|
# furrr::future_map(plot_configs, function(config) {
|
||||||
# generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
|
# generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
|
||||||
# }, .options = furrr_options(seed = TRUE))
|
# }, .options = furrr_options(seed = TRUE))
|
||||||
@@ -1325,9 +1390,9 @@ main <- function() {
|
|||||||
) %>%
|
) %>%
|
||||||
filter(!is.na(L))
|
filter(!is.na(L))
|
||||||
|
|
||||||
message("Calculating summary statistics for background strain")
|
message("Calculating background strain summary statistics")
|
||||||
ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"),
|
ss_bg <- calculate_summary_stats(df_bg, c("L", "K", "r", "AUC", "delta_bg"),
|
||||||
group_vars = c("OrfRep", "conc_num"))
|
group_vars = c("OrfRep", "Drug", "conc_num", "conc_num_factor_factor"))
|
||||||
summary_stats_bg <- ss_bg$summary_stats
|
summary_stats_bg <- ss_bg$summary_stats
|
||||||
df_bg_stats <- ss_bg$df_with_stats
|
df_bg_stats <- ss_bg$df_with_stats
|
||||||
write.csv(
|
write.csv(
|
||||||
@@ -1339,7 +1404,7 @@ main <- function() {
|
|||||||
df_reference <- df_na_stats %>% # formerly X2_RF
|
df_reference <- df_na_stats %>% # formerly X2_RF
|
||||||
filter(OrfRep == strain) %>%
|
filter(OrfRep == strain) %>%
|
||||||
filter(!is.na(L)) %>%
|
filter(!is.na(L)) %>%
|
||||||
group_by(conc_num) %>%
|
group_by(OrfRep, Drug, conc_num) %>%
|
||||||
mutate(
|
mutate(
|
||||||
max_l_theoretical = max(max_L, na.rm = TRUE),
|
max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||||
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||||
@@ -1347,21 +1412,25 @@ main <- function() {
|
|||||||
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
|
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
|
||||||
ungroup()
|
ungroup()
|
||||||
|
|
||||||
message("Calculating reference strain interaction scores")
|
message("Calculating reference strain summary statistics")
|
||||||
df_reference_stats <- calculate_summary_stats(
|
df_reference_stats <- calculate_summary_stats(
|
||||||
df = df_reference,
|
df = df_reference,
|
||||||
variables = c("L", "K", "r", "AUC"),
|
variables = c("L", "K", "r", "AUC"),
|
||||||
group_vars = c("OrfRep", "Gene", "num", "conc_num")
|
group_vars = c("OrfRep", "Gene", "Drug", "num", "conc_num", "conc_num_factor_factor")
|
||||||
)$df_with_stats
|
)$df_with_stats
|
||||||
reference_results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "num"))
|
message("Calculating reference strain interaction scores")
|
||||||
df_calculations_reference <- reference_results$calculations
|
results <- calculate_interaction_scores(df_reference_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug", "num"))
|
||||||
df_interactions_reference <- reference_results$interactions
|
df_calculations_reference <- results$calculations
|
||||||
|
df_interactions_reference <- results$interactions
|
||||||
|
df_interactions_reference_joined <- results$full_data
|
||||||
|
write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
||||||
|
write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
||||||
|
|
||||||
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
|
message("Setting missing deletion values to the highest theoretical value at each drug conc for L")
|
||||||
df_deletion <- df_na_stats %>% # formerly X2
|
df_deletion <- df_na_stats %>% # formerly X2
|
||||||
filter(OrfRep != strain) %>%
|
filter(OrfRep != strain) %>%
|
||||||
filter(!is.na(L)) %>%
|
filter(!is.na(L)) %>%
|
||||||
group_by(conc_num) %>%
|
group_by(OrfRep, Gene, conc_num) %>%
|
||||||
mutate(
|
mutate(
|
||||||
max_l_theoretical = max(max_L, na.rm = TRUE),
|
max_l_theoretical = max(max_L, na.rm = TRUE),
|
||||||
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
|
||||||
@@ -1369,81 +1438,60 @@ main <- function() {
|
|||||||
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
|
L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
|
||||||
ungroup()
|
ungroup()
|
||||||
|
|
||||||
message("Calculating deletion strain(s) interactions scores")
|
message("Calculating deletion strain(s) summary statistics")
|
||||||
df_deletion_stats <- calculate_summary_stats(
|
df_deletion_stats <- calculate_summary_stats(
|
||||||
df = df_deletion,
|
df = df_deletion,
|
||||||
variables = c("L", "K", "r", "AUC"),
|
variables = c("L", "K", "r", "AUC"),
|
||||||
group_vars = c("OrfRep", "Gene", "conc_num")
|
group_vars = c("OrfRep", "Gene", "Drug", "conc_num", "conc_num_factor_factor")
|
||||||
)$df_with_stats
|
)$df_with_stats
|
||||||
deletion_results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene"))
|
message("Calculating deletion strain(s) interactions scores")
|
||||||
df_calculations <- deletion_results$calculations
|
results <- calculate_interaction_scores(df_deletion_stats, df_bg_stats, group_vars = c("OrfRep", "Gene", "Drug"))
|
||||||
df_interactions <- deletion_results$interactions
|
df_calculations <- results$calculations
|
||||||
|
df_interactions <- results$interactions
|
||||||
# Writing Z-Scores to file
|
df_interactions_joined <- results$full_data
|
||||||
write.csv(df_calculations_reference, file = file.path(out_dir, "zscore_calculations_reference.csv"), row.names = FALSE)
|
|
||||||
write.csv(df_interactions_reference, file = file.path(out_dir, "zscore_interactions_reference.csv"), row.names = FALSE)
|
|
||||||
write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
|
write.csv(df_calculations, file = file.path(out_dir, "zscore_calculations.csv"), row.names = FALSE)
|
||||||
write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
|
write.csv(df_interactions, file = file.path(out_dir, "zscore_interactions.csv"), row.names = FALSE)
|
||||||
|
|
||||||
# Create interaction plots
|
# Create interaction plots
|
||||||
message("Generating reference interaction plots")
|
message("Generating reference interaction plots")
|
||||||
reference_plot_configs <- generate_interaction_plot_configs(
|
reference_plot_configs <- generate_interaction_plot_configs(df_interactions_reference_joined, df_bg_stats, "reference")
|
||||||
df_reference_stats, df_calculations_reference, df_interactions_reference, "reference")
|
|
||||||
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
|
generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs)
|
||||||
|
|
||||||
message("Generating deletion interaction plots")
|
message("Generating deletion interaction plots")
|
||||||
deletion_plot_configs <- generate_interaction_plot_configs(
|
deletion_plot_configs <- generate_interaction_plot_configs(df_interactions_joined, df_bg_stats, "deletion")
|
||||||
df_deletion_stats, df_calculations, df_interactions, "deletion")
|
|
||||||
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
|
generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs)
|
||||||
|
|
||||||
# Define conditions for enhancers and suppressors
|
message("Writing enhancer/suppressor csv files")
|
||||||
# TODO Add to study config?
|
interaction_threshold <- 2 # TODO add to study config?
|
||||||
threshold <- 2
|
enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold
|
||||||
enhancer_condition_L <- df_interactions$Avg_Zscore_L >= threshold
|
suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold
|
||||||
suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -threshold
|
enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold
|
||||||
enhancer_condition_K <- df_interactions$Avg_Zscore_K >= threshold
|
suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold
|
||||||
suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -threshold
|
|
||||||
|
|
||||||
# Subset data
|
|
||||||
enhancers_L <- df_interactions[enhancer_condition_L, ]
|
enhancers_L <- df_interactions[enhancer_condition_L, ]
|
||||||
suppressors_L <- df_interactions[suppressor_condition_L, ]
|
suppressors_L <- df_interactions[suppressor_condition_L, ]
|
||||||
enhancers_K <- df_interactions[enhancer_condition_K, ]
|
enhancers_K <- df_interactions[enhancer_condition_K, ]
|
||||||
suppressors_K <- df_interactions[suppressor_condition_K, ]
|
suppressors_K <- df_interactions[suppressor_condition_K, ]
|
||||||
|
enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
|
||||||
# Save enhancers and suppressors
|
enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
|
||||||
message("Writing enhancer/suppressor csv files")
|
|
||||||
write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
|
write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
|
||||||
write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
|
write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
|
||||||
write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
|
write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
|
||||||
write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
|
write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
|
||||||
|
|
||||||
# Combine conditions for enhancers and suppressors
|
|
||||||
enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
|
|
||||||
enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
|
|
||||||
|
|
||||||
# Save combined enhancers and suppressors
|
|
||||||
write.csv(enhancers_and_suppressors_L,
|
write.csv(enhancers_and_suppressors_L,
|
||||||
file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
|
file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
|
||||||
write.csv(enhancers_and_suppressors_K,
|
write.csv(enhancers_and_suppressors_K,
|
||||||
file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
|
file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
|
||||||
|
|
||||||
# Handle linear model based enhancers and suppressors
|
|
||||||
lm_threshold <- 2 # TODO add to study config?
|
|
||||||
enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_threshold, ]
|
|
||||||
suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_threshold, ]
|
|
||||||
enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_threshold, ]
|
|
||||||
suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_threshold, ]
|
|
||||||
|
|
||||||
# Save linear model based enhancers and suppressors
|
|
||||||
message("Writing linear model enhancer/suppressor csv files")
|
message("Writing linear model enhancer/suppressor csv files")
|
||||||
write.csv(enhancers_lm_L,
|
lm_interaction_threshold <- 2 # TODO add to study config?
|
||||||
file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
|
enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ]
|
||||||
write.csv(suppressors_lm_L,
|
suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ]
|
||||||
file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
|
enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ]
|
||||||
write.csv(enhancers_lm_K,
|
suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ]
|
||||||
file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
|
write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
|
||||||
write.csv(suppressors_lm_K,
|
write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
|
||||||
file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
|
write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
|
||||||
|
write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
|
||||||
|
|
||||||
message("Generating rank plots")
|
message("Generating rank plots")
|
||||||
rank_plot_configs <- generate_rank_plot_configs(
|
rank_plot_configs <- generate_rank_plot_configs(
|
||||||
@@ -1463,7 +1511,7 @@ main <- function() {
|
|||||||
generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
|
generate_and_save_plots(out_dir = out_dir, filename = "rank_plots_lm",
|
||||||
plot_configs = rank_lm_plot_configs)
|
plot_configs = rank_lm_plot_configs)
|
||||||
|
|
||||||
overlap_threshold <- 2
|
overlap_threshold <- 2 # TODO add to study config?
|
||||||
df_interactions_filtered <- df_interactions %>%
|
df_interactions_filtered <- df_interactions %>%
|
||||||
filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>%
|
filter(!is.na(Z_lm_L) & !is.na(Avg_Zscore_L)) %>%
|
||||||
mutate(
|
mutate(
|
||||||
|
|||||||
Reference in New Issue
Block a user