Align df handling closer to original script

Tento commit je obsažen v:
2024-09-24 22:17:09 -04:00
rodič eaf75f8892
revize 4741996694

Zobrazit soubor

@@ -193,31 +193,13 @@ calculate_summary_stats <- function(df, variables, group_vars) {
return(list(summary_stats = summary_stats, df_with_stats = df_joined))
}
calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC")) {
calculate_interaction_scores <- function(df, max_conc, variables = c("L", "K", "r", "AUC"),
group_vars = c("OrfRep", "Gene", "num")) {
# Calculate total concentration variables
total_conc_num <- length(unique(df$conc_num))
# Pull the background means and standard deviations from zero concentration
bg_means <- list(
L = df %>% filter(conc_num == 0) %>% pull(mean_L) %>% first(),
K = df %>% filter(conc_num == 0) %>% pull(mean_K) %>% first(),
r = df %>% filter(conc_num == 0) %>% pull(mean_r) %>% first(),
AUC = df %>% filter(conc_num == 0) %>% pull(mean_AUC) %>% first()
)
bg_sd <- list(
L = df %>% filter(conc_num == 0) %>% pull(sd_L) %>% first(),
K = df %>% filter(conc_num == 0) %>% pull(sd_K) %>% first(),
r = df %>% filter(conc_num == 0) %>% pull(sd_r) %>% first(),
AUC = df %>% filter(conc_num == 0) %>% pull(sd_AUC) %>% first()
)
calculations <- calculate_summary_stats(
df = df,
variables = variables,
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
)$df_with_stats
calculations <- calculations %>%
group_by(OrfRep, Gene, num) %>%
@@ -1064,22 +1046,21 @@ main <- function() {
# Filter rows above delta background tolerance
df_above_tolerance <- df %>% filter(DB == 1)
df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .)))
df_no_zeros <- df_na %>% filter(L > 0)
df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .))) # formerly X
df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
# Save some constants
max_conc <- max(df$conc_num)
max_conc <- max(df$conc_num_factor)
l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
message("Calculating summary statistics before quality control")
ss <- calculate_summary_stats(
df_stats <- calculate_summary_stats(
df = df,
variables = summary_vars,
group_vars = c("conc_num", "conc_num_factor"))
df_stats <- ss$df_with_stats
group_vars = c("conc_num", "conc_num_factor"))$df_with_stats
message("Filtering non-finite data")
df_filtered_stats <- process_data(df_stats, c("L"), filter_nf = TRUE)
# df_filtered_stats <- process_data(df_stats, c("L"), filter_nf = TRUE)
message("Calculating summary statistics after quality control")
ss <- calculate_summary_stats(
@@ -1089,7 +1070,7 @@ main <- function() {
df_na_ss <- ss$summary_stats
df_na_stats <- ss$df_with_stats
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
df_na_filtered_stats <- process_data(df_na_stats, c("L"), filter_nf = TRUE)
# df_na_filtered_stats <- process_data(df_na_stats, c("L"), filter_nf = TRUE)
message("Calculating summary statistics after quality control excluding zero values")
ss <- calculate_summary_stats(
@@ -1097,7 +1078,7 @@ main <- function() {
variables = summary_vars,
group_vars = c("conc_num", "conc_num_factor"))
df_no_zeros_stats <- ss$df_with_stats
df_no_zeros_filtered_stats <- process_data(df_no_zeros_stats, c("L"), filter_nf = TRUE)
# df_no_zeros_filtered_stats <- process_data(df_no_zeros_stats, c("L"), filter_nf = TRUE)
message("Filtering by 2SD of K")
df_na_within_2sd_k <- df_na_stats %>%
@@ -1137,7 +1118,7 @@ main <- function() {
frequency_delta_bg_plot_configs <- list(
list(
df = df_filtered_stats,
df = df_stats,
x_var = "delta_bg",
y_var = NULL,
plot_type = "density",
@@ -1148,7 +1129,7 @@ main <- function() {
error_bar = FALSE,
legend_position = "right"),
list(
df = df_filtered_stats,
df = df_stats,
x_var = "delta_bg",
y_var = NULL,
plot_type = "bar",
@@ -1185,27 +1166,27 @@ main <- function() {
plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
variables = summary_vars,
df_before = df_filtered_stats,
df_after = df_na_filtered_stats,
df_before = df_stats,
df_after = df_na_stats,
)
plate_analysis_boxplot_configs <- generate_plate_analysis_plot_configs(
variables = summary_vars,
df_before = df_filtered_stats,
df_after = df_na_filtered_stats,
df_before = df_stats,
df_after = df_na_stats,
plot_type = "box"
)
plate_analysis_no_zeros_plot_configs <- generate_plate_analysis_plot_configs(
variables = summary_vars,
stages = c("after"), # Only after QC
df_after = df_no_zeros_filtered_stats,
df_after = df_no_zeros_stats,
)
plate_analysis_no_zeros_boxplot_configs <- generate_plate_analysis_plot_configs(
variables = summary_vars,
stages = c("after"), # Only after QC
df_after = df_no_zeros_filtered_stats,
df_after = df_no_zeros_stats,
plot_type = "box"
)
@@ -1291,19 +1272,16 @@ main <- function() {
message("Calculating summary statistics for background strain")
ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
summary_stats_bg <- ss_bg$summary_stats
# df_bg_stats <- ss_bg$df_with_stats
write.csv(summary_stats_bg,
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
row.names = FALSE)
# Filter reference and deletion strains
# Formerly X2_RF (reference strains)
df_reference <- df_na_stats %>%
df_reference <- df_na_stats %>% # formerly X2_RF
filter(OrfRep == strain) %>%
mutate(SM = 0)
# Formerly X2 (deletion strains)
df_deletion <- df_na_stats %>%
df_deletion <- df_na_stats %>% # formerly X2
filter(OrfRep != strain) %>%
mutate(SM = 0)
@@ -1329,17 +1307,25 @@ main <- function() {
ungroup()
message("Calculating reference strain interaction scores")
reference_results <- calculate_interaction_scores(reference_strain, max_conc)
df_reference_stats <- calculate_summary_stats(
df = refrence_strain,
variables = interaction_vars,
group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
)$df_with_stats
reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, group_vars = c("OrfRep", "Gene", "num"))
zscores_calculations_reference <- reference_results$calculations
zscores_interactions_reference <- reference_results$interactions
# zscores_calculations_reference_joined <- reference_results$calculations_joined
zscores_interactions_reference_joined <- reference_results$interactions_joined
message("Calculating deletion strain(s) interactions scores")
deletion_results <- calculate_interaction_scores(deletion_strains, max_conc)
df_deletion_stats <- calculate_summary_stats(
df = deletion_strains,
variables = interaction_vars,
group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")
)$df_with_stats
deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, group_vars = c("OrfRep"))
zscores_calculations <- deletion_results$calculations
zscores_interactions <- deletion_results$interactions
# zscores_calculations_joined <- deletion_results$calculations_joined
zscores_interactions_joined <- deletion_results$interactions_joined
# Writing Z-Scores to file
@@ -1358,7 +1344,7 @@ main <- function() {
generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
# Define conditions for enhancers and suppressors
# TODO Add to study config file?
# TODO Add to study config?
threshold <- 2
enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= threshold
suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -threshold