Align df handling closer to original script
Tento commit je obsažen v:
@@ -193,31 +193,13 @@ calculate_summary_stats <- function(df, variables, group_vars) {
|
||||
return(list(summary_stats = summary_stats, df_with_stats = df_joined))
|
||||
}
|
||||
|
||||
calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC")) {
|
||||
calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC"),
|
||||
group_vars = c("OrfRep", "Gene", "num")) {
|
||||
|
||||
# Calculate total concentration variables
|
||||
total_conc_num <- length(unique(df$conc_num))
|
||||
|
||||
# Pull the background means and standard deviations from zero concentration
|
||||
bg_means <- list(
|
||||
L = df %>% filter(conc_num == 0) %>% pull(mean_L) %>% first(),
|
||||
K = df %>% filter(conc_num == 0) %>% pull(mean_K) %>% first(),
|
||||
r = df %>% filter(conc_num == 0) %>% pull(mean_r) %>% first(),
|
||||
AUC = df %>% filter(conc_num == 0) %>% pull(mean_AUC) %>% first()
|
||||
)
|
||||
|
||||
bg_sd <- list(
|
||||
L = df %>% filter(conc_num == 0) %>% pull(sd_L) %>% first(),
|
||||
K = df %>% filter(conc_num == 0) %>% pull(sd_K) %>% first(),
|
||||
r = df %>% filter(conc_num == 0) %>% pull(sd_r) %>% first(),
|
||||
AUC = df %>% filter(conc_num == 0) %>% pull(sd_AUC) %>% first()
|
||||
)
|
||||
|
||||
calculations <- calculate_summary_stats(
|
||||
df = df,
|
||||
variables = variables,
|
||||
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
|
||||
)$df_with_stats
|
||||
|
||||
calculations <- calculations %>%
|
||||
group_by(OrfRep, Gene, num) %>%
|
||||
@@ -1064,22 +1046,21 @@ main <- function() {
|
||||
|
||||
# Filter rows above delta background tolerance
|
||||
df_above_tolerance <- df %>% filter(DB == 1)
|
||||
df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .)))
|
||||
df_no_zeros <- df_na %>% filter(L > 0)
|
||||
df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .))) # formerly X
|
||||
df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
|
||||
|
||||
# Save some constants
|
||||
max_conc <- max(df$conc_num)
|
||||
max_conc <- max(df$conc_num_factor)
|
||||
l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
|
||||
k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
|
||||
|
||||
message("Calculating summary statistics before quality control")
|
||||
ss <- calculate_summary_stats(
|
||||
df_stats <- calculate_summary_stats(
|
||||
df = df,
|
||||
variables = summary_vars,
|
||||
group_vars = c("conc_num", "conc_num_factor"))
|
||||
df_stats <- ss$df_with_stats
|
||||
group_vars = c("conc_num", "conc_num_factor"))$df_with_stats
|
||||
message("Filtering non-finite data")
|
||||
df_filtered_stats <- process_data(df_stats, c("L"), filter_nf = TRUE)
|
||||
# df_filtered_stats <- process_data(df_stats, c("L"), filter_nf = TRUE)
|
||||
|
||||
message("Calculating summary statistics after quality control")
|
||||
ss <- calculate_summary_stats(
|
||||
@@ -1089,7 +1070,7 @@ main <- function() {
|
||||
df_na_ss <- ss$summary_stats
|
||||
df_na_stats <- ss$df_with_stats
|
||||
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||
df_na_filtered_stats <- process_data(df_na_stats, c("L"), filter_nf = TRUE)
|
||||
# df_na_filtered_stats <- process_data(df_na_stats, c("L"), filter_nf = TRUE)
|
||||
|
||||
message("Calculating summary statistics after quality control excluding zero values")
|
||||
ss <- calculate_summary_stats(
|
||||
@@ -1097,7 +1078,7 @@ main <- function() {
|
||||
variables = summary_vars,
|
||||
group_vars = c("conc_num", "conc_num_factor"))
|
||||
df_no_zeros_stats <- ss$df_with_stats
|
||||
df_no_zeros_filtered_stats <- process_data(df_no_zeros_stats, c("L"), filter_nf = TRUE)
|
||||
# df_no_zeros_filtered_stats <- process_data(df_no_zeros_stats, c("L"), filter_nf = TRUE)
|
||||
|
||||
message("Filtering by 2SD of K")
|
||||
df_na_within_2sd_k <- df_na_stats %>%
|
||||
@@ -1137,7 +1118,7 @@ main <- function() {
|
||||
|
||||
frequency_delta_bg_plot_configs <- list(
|
||||
list(
|
||||
df = df_filtered_stats,
|
||||
df = df_stats,
|
||||
x_var = "delta_bg",
|
||||
y_var = NULL,
|
||||
plot_type = "density",
|
||||
@@ -1148,7 +1129,7 @@ main <- function() {
|
||||
error_bar = FALSE,
|
||||
legend_position = "right"),
|
||||
list(
|
||||
df = df_filtered_stats,
|
||||
df = df_stats,
|
||||
x_var = "delta_bg",
|
||||
y_var = NULL,
|
||||
plot_type = "bar",
|
||||
@@ -1185,27 +1166,27 @@ main <- function() {
|
||||
|
||||
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
|
||||
variables = summary_vars,
|
||||
df_before = df_filtered_stats,
|
||||
df_after = df_na_filtered_stats,
|
||||
df_before = df_stats,
|
||||
df_after = df_na_stats,
|
||||
)
|
||||
|
||||
plate_analysis_boxplot_configs <- generate_plate_analysis_plot_configs(
|
||||
variables = summary_vars,
|
||||
df_before = df_filtered_stats,
|
||||
df_after = df_na_filtered_stats,
|
||||
df_before = df_stats,
|
||||
df_after = df_na_stats,
|
||||
plot_type = "box"
|
||||
)
|
||||
|
||||
plate_analysis_no_zeros_plot_configs <- generate_plate_analysis_plot_configs(
|
||||
variables = summary_vars,
|
||||
stages = c("after"), # Only after QC
|
||||
df_after = df_no_zeros_filtered_stats,
|
||||
df_after = df_no_zeros_stats,
|
||||
)
|
||||
|
||||
plate_analysis_no_zeros_boxplot_configs <- generate_plate_analysis_plot_configs(
|
||||
variables = summary_vars,
|
||||
stages = c("after"), # Only after QC
|
||||
df_after = df_no_zeros_filtered_stats,
|
||||
df_after = df_no_zeros_stats,
|
||||
plot_type = "box"
|
||||
)
|
||||
|
||||
@@ -1291,19 +1272,16 @@ main <- function() {
|
||||
message("Calculating summary statistics for background strain")
|
||||
ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
|
||||
summary_stats_bg <- ss_bg$summary_stats
|
||||
# df_bg_stats <- ss_bg$df_with_stats
|
||||
write.csv(summary_stats_bg,
|
||||
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
||||
row.names = FALSE)
|
||||
|
||||
# Filter reference and deletion strains
|
||||
# Formerly X2_RF (reference strains)
|
||||
df_reference <- df_na_stats %>%
|
||||
df_reference <- df_na_stats %>% # formerly X2_RF
|
||||
filter(OrfRep == strain) %>%
|
||||
mutate(SM = 0)
|
||||
|
||||
# Formerly X2 (deletion strains)
|
||||
df_deletion <- df_na_stats %>%
|
||||
df_deletion <- df_na_stats %>% # formerly X2
|
||||
filter(OrfRep != strain) %>%
|
||||
mutate(SM = 0)
|
||||
|
||||
@@ -1329,17 +1307,25 @@ main <- function() {
|
||||
ungroup()
|
||||
|
||||
message("Calculating reference strain interaction scores")
|
||||
reference_results <- calculate_interaction_scores(reference_strain, max_conc)
|
||||
df_reference_stats <- calculate_summary_stats(
|
||||
df = refrence_strain,
|
||||
variables = interaction_vars,
|
||||
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
|
||||
)$df_with_stats
|
||||
reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, group_vars = c("OrfRep", "Gene", "num"))
|
||||
zscores_calculations_reference <- reference_results$calculations
|
||||
zscores_interactions_reference <- reference_results$interactions
|
||||
# zscores_calculations_reference_joined <- reference_results$calculations_joined
|
||||
zscores_interactions_reference_joined <- reference_results$interactions_joined
|
||||
|
||||
message("Calculating deletion strain(s) interactions scores")
|
||||
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc)
|
||||
df_deletion_stats <- calculate_summary_stats(
|
||||
df = deletion_strains,
|
||||
variables = interaction_vars,
|
||||
group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")
|
||||
)$df_with_stats
|
||||
deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, group_vars = c("OrfRep"))
|
||||
zscores_calculations <- deletion_results$calculations
|
||||
zscores_interactions <- deletion_results$interactions
|
||||
# zscores_calculations_joined <- deletion_results$calculations_joined
|
||||
zscores_interactions_joined <- deletion_results$interactions_joined
|
||||
|
||||
# Writing Z-Scores to file
|
||||
@@ -1358,7 +1344,7 @@ main <- function() {
|
||||
generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
|
||||
|
||||
# Define conditions for enhancers and suppressors
|
||||
# TODO Add to study config file?
|
||||
# TODO Add to study config?
|
||||
threshold <- 2
|
||||
enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= threshold
|
||||
suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -threshold
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele