Use a single N value

2024-09-05 20:33:30 -04:00
parent fa54503a5a
commit de599ed5b8
1 changed files with 16 additions and 12 deletions
--- a/workflow/apps/r/calculate_interaction_zscores5.R
+++ b/workflow/apps/r/calculate_interaction_zscores5.R
@@ -183,9 +183,12 @@ process_strains <- function(df) {

 # Calculate summary statistics for all variables
 calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
+  # Calculate summary statistics, including a single N based on L
  summary_stats <- df %>%
    group_by(across(all_of(group_vars))) %>%
-    reframe(across(all_of(variables), list(
+    reframe(
+      N = sum(!is.na(L)),  # Single N based on L
+      across(all_of(variables), list(
        mean = ~mean(., na.rm = TRUE),
        median = ~median(., na.rm = TRUE),
        max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
@@ -195,8 +198,8 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
        # TODO: not in original stats but better to do here than in calculate_interactions?
        # z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA,
        #   (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE))  # Z-score
-    ), .names = "{.fn}_{.col}")) %>%
-    mutate(N = sum(!is.na(L))) # count of non-NA L values
+      ), .names = "{.fn}_{.col}")
+    )

  # Join the summary stats back to the original dataframe
  df_with_stats <- left_join(df, summary_stats, by = group_vars)
@@ -205,6 +208,7 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
  return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
 }

+
 # Calculate interaction scores
 calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {