Remove existing df calls for single dplyr
This commit is contained in:
@@ -805,6 +805,17 @@ generate_correlation_plot_configs <- function(df, variables) {
|
|||||||
return(configs)
|
return(configs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
filter_and_print_non_finite <- function(df, vars_to_check, print_vars) {
|
||||||
|
non_finite_rows <- df %>% filter(if_any(all_of(vars_to_check), ~ !is.finite(.)))
|
||||||
|
|
||||||
|
if (nrow(non_finite_rows) > 0) {
|
||||||
|
message("Removing the following non-finite rows:")
|
||||||
|
print(non_finite_rows %>% select(all_of(print_vars)), n = 200)
|
||||||
|
}
|
||||||
|
|
||||||
|
df %>% filter(if_all(all_of(vars_to_check), is.finite))
|
||||||
|
}
|
||||||
|
|
||||||
main <- function() {
|
main <- function() {
|
||||||
lapply(names(args$experiments), function(exp_name) {
|
lapply(names(args$experiments), function(exp_name) {
|
||||||
exp <- args$experiments[[exp_name]]
|
exp <- args$experiments[[exp_name]]
|
||||||
@@ -821,18 +832,14 @@ main <- function() {
|
|||||||
print_vars <- c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
|
print_vars <- c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
|
||||||
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
|
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
|
||||||
|
|
||||||
message("Loading and filtering data")
|
message("Loading and filtering data for experiment: ", exp_name)
|
||||||
df <- load_and_process_data(args$easy_results_file, sd = exp_sd)
|
df <- load_and_process_data(args$easy_results_file, sd = exp_sd) %>%
|
||||||
df <- update_gene_names(df, args$sgd_gene_list)
|
update_gene_names(args$sgd_gene_list) %>%
|
||||||
df <- as_tibble(df)
|
as_tibble()
|
||||||
|
|
||||||
# Filter rows that are above tolerance for quality control plots
|
# Quality Control: Filter rows above tolerance
|
||||||
df_above_tolerance <- df %>% filter(DB == 1)
|
df_above_tolerance <- df %>% filter(DB == 1)
|
||||||
|
|
||||||
# Set L, r, K, AUC (and delta_bg?) to NA for rows that are above tolerance
|
|
||||||
df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .)))
|
df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .)))
|
||||||
|
|
||||||
# Remove rows with 0 values in L
|
|
||||||
df_no_zeros <- df_na %>% filter(L > 0)
|
df_no_zeros <- df_na %>% filter(L > 0)
|
||||||
|
|
||||||
# Save some constants
|
# Save some constants
|
||||||
@@ -842,17 +849,8 @@ main <- function() {
|
|||||||
|
|
||||||
message("Calculating summary statistics before quality control")
|
message("Calculating summary statistics before quality control")
|
||||||
ss <- calculate_summary_stats(df, summary_vars, group_vars = group_vars)
|
ss <- calculate_summary_stats(df, summary_vars, group_vars = group_vars)
|
||||||
# df_ss <- ss$summary_stats
|
|
||||||
df_stats <- ss$df_with_stats
|
df_stats <- ss$df_with_stats
|
||||||
df_filtered_stats <- df_stats %>%
|
df_filtered_stats <- filter_and_print_non_finite(df_stats, "L", print_vars)
|
||||||
{
|
|
||||||
non_finite_rows <- filter(., if_any(c(L), ~ !is.finite(.)))
|
|
||||||
if (nrow(non_finite_rows) > 0) {
|
|
||||||
message("Filtering out the following non-finite rows:")
|
|
||||||
print(non_finite_rows %>% select(any_of(print_vars)), n = 200)
|
|
||||||
}
|
|
||||||
filter(., if_all(c(L), is.finite))
|
|
||||||
}
|
|
||||||
|
|
||||||
message("Calculating summary statistics after quality control")
|
message("Calculating summary statistics after quality control")
|
||||||
ss <- calculate_summary_stats(df_na, summary_vars, group_vars = group_vars)
|
ss <- calculate_summary_stats(df_na, summary_vars, group_vars = group_vars)
|
||||||
@@ -860,28 +858,12 @@ main <- function() {
|
|||||||
df_na_stats <- ss$df_with_stats
|
df_na_stats <- ss$df_with_stats
|
||||||
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||||
# Filter out non-finite rows for plotting
|
# Filter out non-finite rows for plotting
|
||||||
df_na_filtered_stats <- df_na_stats %>%
|
df_na_filtered_stats <- filter_and_print_non_finite(df_na_stats, "L", print_vars)
|
||||||
{
|
|
||||||
non_finite_rows <- filter(., if_any(c(L), ~ !is.finite(.)))
|
|
||||||
if (nrow(non_finite_rows) > 0) {
|
|
||||||
message("Removed the following non-finite rows:")
|
|
||||||
print(non_finite_rows %>% select(any_of(print_vars)), n = 200)
|
|
||||||
}
|
|
||||||
filter(., if_all(c(L), is.finite))
|
|
||||||
}
|
|
||||||
|
|
||||||
message("Calculating summary statistics after quality control excluding zero values")
|
message("Calculating summary statistics after quality control excluding zero values")
|
||||||
ss <- calculate_summary_stats(df_no_zeros, summary_vars, group_vars = group_vars)
|
ss <- calculate_summary_stats(df_no_zeros, summary_vars, group_vars = group_vars)
|
||||||
df_no_zeros_stats <- ss$df_with_stats
|
df_no_zeros_stats <- ss$df_with_stats
|
||||||
df_no_zeros_filtered_stats <- df_no_zeros_stats %>%
|
df_no_zeros_filtered_stats <- filter_and_print_non_finite(df_no_zeros_stats, "L", print_vars)
|
||||||
{
|
|
||||||
non_finite_rows <- filter(., if_any(c(L), ~ !is.finite(.)))
|
|
||||||
if (nrow(non_finite_rows) > 0) {
|
|
||||||
message("Removed the following non-finite rows:")
|
|
||||||
print(non_finite_rows %>% select(any_of(print_vars)), n = 200)
|
|
||||||
}
|
|
||||||
filter(., if_all(c(L), is.finite))
|
|
||||||
}
|
|
||||||
|
|
||||||
message("Filtering by 2SD of K")
|
message("Filtering by 2SD of K")
|
||||||
df_na_within_2sd_k <- df_na_stats %>%
|
df_na_within_2sd_k <- df_na_stats %>%
|
||||||
@@ -892,17 +874,11 @@ main <- function() {
|
|||||||
message("Calculating summary statistics for L within 2SD of K")
|
message("Calculating summary statistics for L within 2SD of K")
|
||||||
# TODO We're omitting the original z_max calculation, not sure if needed?
|
# TODO We're omitting the original z_max calculation, not sure if needed?
|
||||||
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||||
l_within_2sd_k_ss <- ss$summary_stats
|
write.csv(ss$summary_stats, file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2sd_K.csv"), row.names = FALSE)
|
||||||
df_na_l_within_2sd_k_stats <- ss$df_with_stats
|
|
||||||
write.csv(l_within_2sd_k_ss,
|
|
||||||
file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2sd_K.csv"), row.names = FALSE)
|
|
||||||
|
|
||||||
message("Calculating summary statistics for L outside 2SD of K")
|
message("Calculating summary statistics for L outside 2SD of K")
|
||||||
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
|
||||||
l_outside_2sd_k_ss <- ss$summary_stats
|
write.csv(ss$summary_stats, file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"), row.names = FALSE)
|
||||||
df_na_l_outside_2sd_k_stats <- ss$df_with_stats
|
|
||||||
write.csv(l_outside_2sd_k_ss,
|
|
||||||
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"), row.names = FALSE)
|
|
||||||
|
|
||||||
# Each plots list corresponds to a file
|
# Each plots list corresponds to a file
|
||||||
message("Generating quality control plot configurations")
|
message("Generating quality control plot configurations")
|
||||||
|
|||||||
Reference in New Issue
Block a user