|
@@ -579,7 +579,7 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|
|
AUC = c(-6500, 6500)
|
|
|
)
|
|
|
|
|
|
- df_filtered <- filter_data_for_plots(df, variables, missing = TRUE, limits_map)
|
|
|
+ df_filtered <- filter_data(df, variables, missing = TRUE, limits_map)
|
|
|
|
|
|
# Define annotation label functions
|
|
|
generate_annotation_labels <- function(df, var, annotation_name) {
|
|
@@ -672,7 +672,7 @@ generate_interaction_plot_configs <- function(df, variables) {
|
|
|
|
|
|
generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE) {
|
|
|
|
|
|
- df_filtered <- filter_data_for_plots(df, variables, missing = TRUE)
|
|
|
+ df_filtered <- filter_data(df, variables, missing = TRUE)
|
|
|
|
|
|
for (var in variables) {
|
|
|
avg_zscore_col <- paste0("Avg_Zscore_", var)
|
|
@@ -776,22 +776,20 @@ generate_correlation_plot_configs <- function(df, variables) {
|
|
|
return(configs)
|
|
|
}
|
|
|
|
|
|
-filter_and_print_non_finite <- function(df, vars_to_check, print_vars) {
|
|
|
- non_finite_rows <- df %>% filter(if_any(all_of(vars_to_check), ~ !is.finite(.)))
|
|
|
-
|
|
|
- if (nrow(non_finite_rows) > 0) {
|
|
|
- message("Filtering non-finite rows:")
|
|
|
- print(non_finite_rows %>% select(all_of(print_vars)), n = 200)
|
|
|
- }
|
|
|
-
|
|
|
- df %>% filter(if_all(all_of(vars_to_check), is.finite))
|
|
|
-}
|
|
|
-
|
|
|
-filter_data_for_plots <- function(df, variables, missing = FALSE, limits_map = NULL) {
|
|
|
+filter_data <- function(df, variables, nf = FALSE, missing = FALSE, limits_map = NULL) {
|
|
|
|
|
|
# Loop through each variable to filter and print missing/out-of-range data
|
|
|
for (variable in variables) {
|
|
|
y_var_sym <- sym(variable)
|
|
|
+
|
|
|
+ if (nf) {
|
|
|
+ non_finite <- df %>% filter(!is.finite(!!y_var_sym))
|
|
|
+ if (nrow(non_finite) > 0) {
|
|
|
+ message("Non-finite rows for variable ", variable, ":")
|
|
|
+ print(non_finite)
|
|
|
+ }
|
|
|
+ df <- df %>% filter(is.finite(!!y_var_sym))
|
|
|
+ }
|
|
|
|
|
|
# Filter missing data
|
|
|
if (missing) {
|
|
@@ -844,7 +842,7 @@ main <- function() {
|
|
|
update_gene_names(args$sgd_gene_list) %>%
|
|
|
as_tibble()
|
|
|
|
|
|
- # Quality Control: Filter rows above tolerance
|
|
|
+ # Filter rows above delta background tolerance
|
|
|
df_above_tolerance <- df %>% filter(DB == 1)
|
|
|
df_na <- df %>% mutate(across(all_of(summary_vars), ~ ifelse(DB == 1, NA, .)))
|
|
|
df_no_zeros <- df_na %>% filter(L > 0)
|
|
@@ -857,20 +855,20 @@ main <- function() {
|
|
|
message("Calculating summary statistics before quality control")
|
|
|
ss <- calculate_summary_stats(df, summary_vars, group_vars = group_vars)
|
|
|
df_stats <- ss$df_with_stats
|
|
|
- df_filtered_stats <- filter_and_print_non_finite(df_stats, "L", print_vars)
|
|
|
+ message("Filtering non-finite data")
|
|
|
+ df_filtered_stats <- filter_data(df_stats, c("L"), nf = TRUE)
|
|
|
|
|
|
message("Calculating summary statistics after quality control")
|
|
|
ss <- calculate_summary_stats(df_na, summary_vars, group_vars = group_vars)
|
|
|
df_na_ss <- ss$summary_stats
|
|
|
df_na_stats <- ss$df_with_stats
|
|
|
write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
|
|
- # Filter out non-finite rows for plotting
|
|
|
- df_na_filtered_stats <- filter_and_print_non_finite(df_na_stats, "L", print_vars)
|
|
|
+ df_na_filtered_stats <- filter_data(df_na_stats, c("L"), nf = TRUE)
|
|
|
|
|
|
message("Calculating summary statistics after quality control excluding zero values")
|
|
|
ss <- calculate_summary_stats(df_no_zeros, summary_vars, group_vars = group_vars)
|
|
|
df_no_zeros_stats <- ss$df_with_stats
|
|
|
- df_no_zeros_filtered_stats <- filter_and_print_non_finite(df_no_zeros_stats, "L", print_vars)
|
|
|
+ df_no_zeros_filtered_stats <- filter_data(df_no_zeros_stats, c("L"), nf = TRUE)
|
|
|
|
|
|
message("Filtering by 2SD of K")
|
|
|
df_na_within_2sd_k <- df_na_stats %>%
|