Refactor out calculate_bg_means
This commit is contained in:
@@ -273,16 +273,6 @@ save_plots <- function(file_name, plot_list, output_dir) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
# Calculate background strain mean values
|
|
||||||
calculate_bg_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) {
|
|
||||||
list(
|
|
||||||
L = df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L),
|
|
||||||
K = df_stats_by_k %>% filter(conc_num_factor == 0) %>% pull(mean_K),
|
|
||||||
r = df_stats_by_r %>% filter(conc_num_factor == 0) %>% pull(mean_r),
|
|
||||||
AUC = df_stats_by_auc %>% filter(conc_num_factor == 0) %>% pull(mean_AUC)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process strains (deletion and reference)
|
# Process strains (deletion and reference)
|
||||||
process_strains <- function(df, l_within_2sd_k, strain) {
|
process_strains <- function(df, l_within_2sd_k, strain) {
|
||||||
df_strains <- data.frame() # Initialize an empty dataframe to store results
|
df_strains <- data.frame() # Initialize an empty dataframe to store results
|
||||||
@@ -308,7 +298,13 @@ process_strains <- function(df, l_within_2sd_k, strain) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc,
|
calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc,
|
||||||
background_means, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
||||||
|
|
||||||
|
# Calculate background means
|
||||||
|
L_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(L_mean)
|
||||||
|
K_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(K_mean)
|
||||||
|
r_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(r_mean)
|
||||||
|
AUC_mean_bg <- df %>% filter(conc_num_factor == 0) %>% pull(AUC_mean)
|
||||||
|
|
||||||
# Calculate all necessary statistics and shifts in one step
|
# Calculate all necessary statistics and shifts in one step
|
||||||
interaction_scores_all <- df %>%
|
interaction_scores_all <- df %>%
|
||||||
@@ -318,18 +314,18 @@ calculate_interaction_scores <- function(df, df_stats_by_l, df_stats_by_k, df_st
|
|||||||
across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE),
|
across(all_of(variables), list(mean = mean, sd = sd), na.rm = TRUE),
|
||||||
NG = sum(NG, na.rm = TRUE),
|
NG = sum(NG, na.rm = TRUE),
|
||||||
SM = sum(SM, na.rm = TRUE),
|
SM = sum(SM, na.rm = TRUE),
|
||||||
raw_shift_l = mean(L, na.rm = TRUE) - background_means$L,
|
raw_shift_l = mean(L, na.rm = TRUE) - L_mean_bg,
|
||||||
raw_shift_k = mean(K, na.rm = TRUE) - background_means$K,
|
raw_shift_k = mean(K, na.rm = TRUE) - K_mean_bg,
|
||||||
raw_shift_r = mean(r, na.rm = TRUE) - background_means$r,
|
raw_shift_r = mean(r, na.rm = TRUE) - r_mean_bg,
|
||||||
raw_shift_auc = mean(AUC, na.rm = TRUE) - background_means$AUC,
|
raw_shift_auc = mean(AUC, na.rm = TRUE) - AUC_mean_bg,
|
||||||
z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1],
|
z_shift_l = raw_shift_l / df_stats_by_l$L_sd[1],
|
||||||
z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1],
|
z_shift_k = raw_shift_k / df_stats_by_k$K_sd[1],
|
||||||
z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1],
|
z_shift_r = raw_shift_r / df_stats_by_r$r_sd[1],
|
||||||
z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1],
|
z_shift_auc = raw_shift_auc / df_stats_by_auc$AUC_sd[1],
|
||||||
exp_l = background_means$L + raw_shift_l,
|
exp_l = L_mean_bg + raw_shift_l,
|
||||||
exp_k = background_means$K + raw_shift_k,
|
exp_k = K_mean_bg + raw_shift_k,
|
||||||
exp_r = background_means$r + raw_shift_r,
|
exp_r = r_mean_bg + raw_shift_r,
|
||||||
exp_auc = background_means$AUC + raw_shift_auc,
|
exp_auc = AUC_mean_bg + raw_shift_auc,
|
||||||
delta_l = mean(L, na.rm = TRUE) - exp_l,
|
delta_l = mean(L, na.rm = TRUE) - exp_l,
|
||||||
delta_k = mean(K, na.rm = TRUE) - exp_k,
|
delta_k = mean(K, na.rm = TRUE) - exp_k,
|
||||||
delta_r = mean(r, na.rm = TRUE) - exp_r,
|
delta_r = mean(r, na.rm = TRUE) - exp_r,
|
||||||
@@ -583,15 +579,20 @@ main <- function() {
|
|||||||
stats_by_r <- stats_joined %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
stats_by_r <- stats_joined %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||||
stats_by_auc <- stats_joined %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
stats_by_auc <- stats_joined %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||||
|
|
||||||
stats_by_k_joined <- left_join(df_na, stats_by_k, by = c("conc_num_factor")) # TODO may need to add OrfRep, conc_num, etc
|
# Originally this filtered L NA's
|
||||||
|
# I've removed that filtering for now since it didn't seem right but may need to add it back in later
|
||||||
|
# str(stats_by_k)
|
||||||
|
stats_by_k_joined <- left_join(df_na, stats_by_k, by = c("conc_num", "conc_num_factor"))
|
||||||
|
|
||||||
|
str(stats_by_k_joined)
|
||||||
|
|
||||||
# Filter data within 2SD
|
# Filter data within 2SD
|
||||||
within_2sd_k <- stats_by_k_joined %>%
|
within_2sd_k <- stats_by_k %>%
|
||||||
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
|
filter(K >= (K_mean - 2 * K_sd) & K <= (K_mean + 2 * K_sd))
|
||||||
|
|
||||||
# Filter data outside 2SD
|
# Filter data outside 2SD
|
||||||
outside_2sd_k <- stats_by_k_joined %>%
|
outside_2sd_k <- stats_by_k %>%
|
||||||
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
|
filter(K < (K_mean - 2 * K_sd) | K > (K_mean + 2 * K_sd))
|
||||||
|
|
||||||
# Calculate summary statistics for L within and outside 2SD of K
|
# Calculate summary statistics for L within and outside 2SD of K
|
||||||
message("Calculating summary statistics for L within 2SD of K")
|
message("Calculating summary statistics for L within 2SD of K")
|
||||||
@@ -606,8 +607,8 @@ main <- function() {
|
|||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
|
|
||||||
# Process background strains
|
# Process background strains
|
||||||
background_strains <- c("YDL227C")
|
bg_strains <- c("YDL227C")
|
||||||
lapply(background_strains, function(strain) {
|
lapply(bg_strains, function(strain) {
|
||||||
|
|
||||||
message("Processing background strain: ", strain)
|
message("Processing background strain: ", strain)
|
||||||
|
|
||||||
@@ -635,9 +636,6 @@ main <- function() {
|
|||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
stats_bg_joined <- left_join(df_bg, stats_bg, by = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
|
stats_bg_joined <- left_join(df_bg, stats_bg, by = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
|
||||||
|
|
||||||
message("Calculating background means")
|
|
||||||
background_means <- calculate_bg_means(stats_by_l_bg, stats_by_k_bg, stats_by_r_bg, stats_by_auc_bg)
|
|
||||||
|
|
||||||
# Filter reference and deletion strains
|
# Filter reference and deletion strains
|
||||||
# Formerly X2_RF (reference strain)
|
# Formerly X2_RF (reference strain)
|
||||||
df_reference <- df_na %>%
|
df_reference <- df_na %>%
|
||||||
@@ -662,15 +660,17 @@ main <- function() {
|
|||||||
# This is synonymous with the legacy OrfRep mutation
|
# This is synonymous with the legacy OrfRep mutation
|
||||||
# Use group_by in functions in lieu of mutating OrfRep
|
# Use group_by in functions in lieu of mutating OrfRep
|
||||||
# default_group_vars <- c("OrfRep", "Gene", "num")
|
# default_group_vars <- c("OrfRep", "Gene", "num")
|
||||||
|
|
||||||
|
# TODO we may need to add "num" to grouping vars
|
||||||
|
|
||||||
# Calculate interactions
|
# Calculate interactions
|
||||||
variables <- c("L", "K", "r", "AUC")
|
variables <- c("L", "K", "r", "AUC")
|
||||||
message("Calculating reference interaction scores")
|
message("Calculating reference interaction scores")
|
||||||
reference_results <- calculate_interaction_scores(reference_strain, stats_by_l,
|
reference_results <- calculate_interaction_scores(reference_strain, stats_by_l,
|
||||||
stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
|
stats_by_k, stats_by_r, stats_by_auc, max_conc, variables)
|
||||||
message("Calculating deletion interaction scores")
|
message("Calculating deletion interaction scores")
|
||||||
deletion_results <- calculate_interaction_scores(deletion_strains, stats_by_l,
|
deletion_results <- calculate_interaction_scores(deletion_strains, stats_by_l,
|
||||||
stats_by_k, stats_by_r, stats_by_auc, background_means, max_conc, variables)
|
stats_by_k, stats_by_r, stats_by_auc, max_conc, variables)
|
||||||
|
|
||||||
zscores_calculations_reference <- reference_results$zscores_calculations
|
zscores_calculations_reference <- reference_results$zscores_calculations
|
||||||
zscores_interactions_reference <- reference_results$zscores_interactions
|
zscores_interactions_reference <- reference_results$zscores_interactions
|
||||||
|
|||||||
Reference in New Issue
Block a user