Left join summary stats df with input df
This commit is contained in:
@@ -228,15 +228,12 @@ generate_and_save_plots <- function(df, output_dir, prefix, variables, include_q
|
||||
save_plots(prefix, plots, output_dir)
|
||||
}
|
||||
|
||||
# Calculate summary statistics for all variables together
|
||||
# Calculate summary statistics for all variables
|
||||
calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
|
||||
# Calculate summary statistics with the grouping columns
|
||||
summary_stats <- df %>%
|
||||
group_by(across(all_of(group_vars))) %>%
|
||||
summarise(across(all_of(variables), list(
|
||||
N = ~{
|
||||
message("Calculating summary statistics for ", cur_column())
|
||||
n()
|
||||
},
|
||||
mean = ~mean(.x, na.rm = TRUE),
|
||||
median = ~median(.x, na.rm = TRUE),
|
||||
max = ~max(.x, na.rm = TRUE),
|
||||
@@ -299,7 +296,7 @@ save_plots <- function(file_name, plot_list, output_dir) {
|
||||
|
||||
|
||||
# Calculate background strain mean values
|
||||
calculate_background_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) {
|
||||
calculate_bg_means <- function(df_stats_by_l, df_stats_by_k, df_stats_by_r, df_stats_by_auc) {
|
||||
list(
|
||||
L = df_stats_by_l %>% filter(conc_num_factor == 0) %>% pull(mean_L),
|
||||
K = df_stats_by_k %>% filter(conc_num_factor == 0) %>% pull(mean_K),
|
||||
@@ -574,12 +571,20 @@ main <- function() {
|
||||
)
|
||||
df_no_zeros <- df_na %>% filter(L > 0)
|
||||
|
||||
# Flag and remove non-finite data, printing affected rows
|
||||
df_na_filtered <- df_na %>%
|
||||
filter(if_any(c(L, r, AUC, K), ~ !is.finite(.))) %>%
|
||||
{
|
||||
if (nrow(.) > 0) message("Removing non-finite rows:\n", print(.))
|
||||
df_na %>% filter(if_all(c(L, r, AUC, K), is.finite))
|
||||
}
|
||||
|
||||
# Generate QC PDFs and HTMLs
|
||||
message("Generating QC plots")
|
||||
variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||
generate_and_save_plots(df, out_dir_qc, "Before_QC", variables, include_qc = TRUE)
|
||||
generate_and_save_plots(df_above_tolerance, out_dir_qc, "Raw_L_vs_K_above_delta_bg_threshold", variables, include_qc = TRUE)
|
||||
generate_and_save_plots(df_na, out_dir_qc, "After_QC", variables)
|
||||
generate_and_save_plots(df_na_filtered, out_dir_qc, "After_QC", variables)
|
||||
generate_and_save_plots(df_no_zeros, out_dir_qc, "No_Zeros", variables)
|
||||
|
||||
rm(df, df_above_tolerance, df_no_zeros)
|
||||
@@ -587,14 +592,14 @@ main <- function() {
|
||||
# Calculate summary statistics
|
||||
message("Calculating summary statistics for all strains")
|
||||
variables <- c("L", "K", "r", "AUC")
|
||||
summary_stats <- calculate_summary_stats(df_na, variables, group_vars = c("conc_num", "conc_num_factor"))
|
||||
write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||
stats <- calculate_summary_stats(df_na, variables, group_vars = c("conc_num", "conc_num_factor"))
|
||||
write.csv(stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
|
||||
stats_joined <- left_join(df_na, stats, by = c("conc_num", "conc_num_factor"))
|
||||
|
||||
# Calculate the pre-background stats once
|
||||
stats_by_l <- summary_stats %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_k <- summary_stats %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_r <- summary_stats %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_auc <- summary_stats %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_l <- stats_joined %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_k <- stats_joined %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_r <- stats_joined %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_auc <- stats_joined %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
|
||||
# Process background strains
|
||||
background_strains <- c("YDL227C")
|
||||
@@ -604,7 +609,7 @@ main <- function() {
|
||||
|
||||
# Handle missing data by setting zero values to NA
|
||||
# and then removing any rows with NA in L col
|
||||
df_background <- df_na %>%
|
||||
df_bg <- df_na %>%
|
||||
filter(OrfRep == strain) %>%
|
||||
mutate(
|
||||
L = if_else(L == 0, NA, L),
|
||||
@@ -616,17 +621,18 @@ main <- function() {
|
||||
|
||||
# Recalculate summary statistics for the background strain
|
||||
message("Calculating summary statistics for background strain")
|
||||
stats_background <- calculate_summary_stats(df_background, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
|
||||
stats_by_l_background <- stats_background %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_k_background <- stats_background %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_r_background <- stats_background %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_auc_background <- stats_background %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
write.csv(stats_background,
|
||||
stats_bg <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor"))
|
||||
stats_by_l_bg <- stats_bg %>% select(starts_with("L_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_k_bg <- stats_bg %>% select(starts_with("K_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_r_bg <- stats_bg %>% select(starts_with("r_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
stats_by_auc_bg <- stats_bg %>% select(starts_with("AUC_"), "OrfRep", "conc_num", "conc_num_factor")
|
||||
write.csv(stats_bg,
|
||||
file = file.path(output_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
||||
row.names = FALSE)
|
||||
stats_bg_joined <- left_join(df_bg, stats_bg, by = c("conc_num", "conc_num_factor"))
|
||||
|
||||
# Filter L values within and outside 2SD of K
|
||||
results_2sd <- calculate_l_2sd_of_k(df_background, stats_by_k_background)
|
||||
results_2sd <- calculate_l_2sd_of_k(df_bg, stats_by_k_bg)
|
||||
within_2sd_k <- results_2sd$within_2sd_k
|
||||
outside_2sd_k <- results_2sd$outside_2sd_k
|
||||
|
||||
@@ -646,8 +652,7 @@ main <- function() {
|
||||
generate_and_save_plots(outside_2sd_k, out_dir, "Raw_L_vs_K_for_strains_outside_2sd_k")
|
||||
|
||||
message("Calculating background means")
|
||||
background_means <- calculate_background_means(stats_by_l_background,
|
||||
stats_by_k_background, stats_by_r_background, stats_by_auc_background)
|
||||
background_means <- calculate_bg_means(stats_by_l_bg, stats_by_k_bg, stats_by_r_bg, stats_by_auc_bg)
|
||||
|
||||
# Filter reference and deletion strains
|
||||
# Formerly X2_RF (reference strain)
|
||||
|
||||
Reference in New Issue
Block a user