From 5d0d017a67ed7d8b11e01d8c7586f704361b188e Mon Sep 17 00:00:00 2001 From: Bryan Roessler Date: Tue, 17 Sep 2024 21:03:19 -0400 Subject: [PATCH] Cleanup output --- .../apps/r/calculate_interaction_zscores.R | 32 ++++++++----------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 1d5b857b..2e193437 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -168,10 +168,10 @@ calculate_summary_stats <- function(df, variables, group_vars) { across(all_of(variables), list( mean = ~mean(., na.rm = TRUE), median = ~median(., na.rm = TRUE), - max = ~max(., na.rm = TRUE), - min = ~min(., na.rm = TRUE), + max = ~ ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), + min = ~ ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), sd = ~sd(., na.rm = TRUE), - se = ~sd(., na.rm = TRUE) / sqrt(N) # Corrected SE calculation + se = ~sd(., na.rm = TRUE) / sqrt(N) - 1 # TODO needs comment for explanation ), .names = "{.fn}_{.col}"), .groups = "drop" ) @@ -331,7 +331,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars) { # Declare column order for output calculations <- stats %>% select( - "OrfRep", "Gene", "conc_num", "conc_num_factor", "N", + "OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "N", "mean_L", "mean_K", "mean_r", "mean_AUC", "median_L", "median_K", "median_r", "median_AUC", "sd_L", "sd_K", "sd_r", "sd_AUC", @@ -932,7 +932,7 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL # Adjust NAs to .001 for linear model if (adjust) { - if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns.") + if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns") df <- df %>% mutate( across(all_of(avg_zscore_cols), ~ ifelse(is.na(.), 0.001, .)), @@ -942,14 +942,12 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL # Filter non-finite values if (nf) { - if (verbose) message("Filtering non-finite values for variables: ", paste(variables, collapse = ", ")) - non_finite_df <- df %>% filter(if_any(all_of(variables), ~ !is.finite(.))) if (verbose && nrow(non_finite_df) > 0) { - message("Filtering non-finite rows for variables ", paste(variables, collapse = ", "), ":") - print(non_finite_df) + message("Filtering non-finite rows for variable(s) ", paste(variables, collapse = ", "), ":") + print(non_finite_df %>% select(all_of(c("scan", "Plate", "Row", "Col", "num", "conc_num", variables))), n = 30) } df <- df %>% @@ -958,18 +956,15 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL # Filter missing values if (missing) { - if (verbose) message("Filtering missing values for variables: ", paste(variables, collapse = ", ")) - # Identify missing rows for logging missing_df <- df %>% filter(if_any(all_of(variables), ~ is.na(.))) if (verbose && nrow(missing_df) > 0) { - message("Missing data for variables ", paste(variables, collapse = ", "), ":") - print(missing_df) + message("Filtering missing data for variable(s) ", paste(variables, collapse = ", "), ":") + print(missing_df %>% select(all_of(c("scan", "Plate", "Row", "Col", "num", "conc_num", variables))), n = 30) } - # Keep only rows where all specified variables are not missing df <- df %>% filter(if_all(all_of(variables), ~ !is.na(.))) } @@ -980,14 +975,13 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL if (variable %in% variables) { ylim_vals <- limits_map[[variable]] - if (verbose) message("Applying limits for variable ", variable, ": [", ylim_vals[1], ", ", ylim_vals[2], "].") - out_of_range_df <- df %>% filter(.data[[variable]] < ylim_vals[1] | .data[[variable]] > ylim_vals[2]) if (verbose && nrow(out_of_range_df) > 0) { - message("Out-of-range data for variable ", variable, ":") - print(out_of_range_df) + message("Applying limits for variable ", variable, ": [", ylim_vals[1], ", ", ylim_vals[2], "].") + message("Filtering out-of-range data for variable ", variable, ":") + print(out_of_range_df %>% select(all_of(c("scan", "Plate", "Row", "Col", "num", "conc_num", variables))), n = 30) } df <- df %>% @@ -998,7 +992,7 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL # Calculate Rank Columns if 'rank' is TRUE if (rank) { - if (verbose) message("Calculating rank columns for variables: ", paste(variables, collapse = ", ")) + if (verbose) message("Calculating ranks for variable(s): ", paste(variables, collapse = ", ")) for (col in avg_zscore_cols) { rank_col <- paste0("Rank_", col)