diff --git a/workflow/apps/r/calculate_interaction_zscores5.R b/workflow/apps/r/calculate_interaction_zscores5.R index 25bea49e..6328f7a8 100644 --- a/workflow/apps/r/calculate_interaction_zscores5.R +++ b/workflow/apps/r/calculate_interaction_zscores5.R @@ -192,29 +192,29 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c summary_stats <- df %>% group_by(across(all_of(group_vars))) %>% summarise(across(all_of(variables), list( - N = ~length(.), - mean = ~mean(., na.rm = TRUE), - median = ~median(., na.rm = TRUE), - max = ~max(., na.rm = TRUE), - min = ~min(., na.rm = TRUE), - sd = ~sd(., na.rm = TRUE) + N = ~length(na.omit(.)), # Exclude NA values from count + mean = ~mean(., na.rm = TRUE), # Mean ignoring NAs + median = ~median(., na.rm = TRUE), # Median ignoring NAs + max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), # Handle groups where all values are NA + min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), # Handle groups where all values are NA + sd = ~sd(., na.rm = TRUE) # Standard deviation ignoring NAs ), .names = "{.fn}_{.col}")) %>% mutate( - se_L = sd_L / sqrt(N_L - 1), - se_K = sd_K / sqrt(N_K - 1), - se_r = sd_r / sqrt(N_r - 1), - se_AUC = sd_AUC / sqrt(N_AUC - 1), - z_max_L = (max_L - mean_L) / sd_L, - z_max_K = (max_K - mean_K) / sd_K, - z_max_r = (max_r - mean_r) / sd_r, - z_max_AUC = (max_AUC - mean_AUC) / sd_AUC + se_L = ifelse(N_L > 1, sd_L / sqrt(N_L - 1), NA), # Standard error with check for division by zero + se_K = ifelse(N_K > 1, sd_K / sqrt(N_K - 1), NA), + se_r = ifelse(N_r > 1, sd_r / sqrt(N_r - 1), NA), + se_AUC = ifelse(N_AUC > 1, sd_AUC / sqrt(N_AUC - 1), NA), + z_max_L = ifelse(sd_L == 0, NA, (max_L - mean_L) / sd_L), # Avoid division by zero for Z-scores + z_max_K = ifelse(sd_K == 0, NA, (max_K - mean_K) / sd_K), + z_max_r = ifelse(sd_r == 0, NA, (max_r - mean_r) / sd_r), + z_max_AUC = ifelse(sd_AUC == 0, NA, (max_AUC - mean_AUC) / sd_AUC) ) return(summary_stats) } -calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { +calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { # Calculate total concentration variables total_conc_num <- length(unique(df$conc_num)) @@ -351,7 +351,7 @@ generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_ "scatter" = plot + geom_point() + geom_smooth(method = "lm", se = FALSE), "box" = plot + geom_boxplot(), "density" = plot + geom_density(), - "bar" = plot + geom_bar(stat = "identity"), + "bar" = plot + geom_bar(), plot # Default: return the plot as is ) @@ -548,7 +548,7 @@ generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num", } else if (plot_type == "density") { plot <- plot + geom_density() } else if (plot_type == "bar") { - plot <- plot + geom_bar(stat = "identity") + plot <- plot + geom_bar() } if (!is.null(x_label)) plot <- plot + xlab(x_label) @@ -657,9 +657,9 @@ main <- function() { # Filter data within and outside 2SD within_2sd_k <- stats_joined %>% - filter(K >= (K_mean - 2 * K_sd) & K <= (K_mean + 2 * K_sd)) + filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K)) outside_2sd_k <- stats_joined %>% - filter(K < (K_mean - 2 * K_sd) | K > (K_mean + 2 * K_sd)) + filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K)) # Summary statistics for within and outside 2SD of K l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) @@ -730,8 +730,8 @@ main <- function() { # Calculate interactions variables <- c("L", "K", "r", "AUC") # We are recalculating some of the data here - reference_results <- calculate_interaction_scores(stats_joined, reference_strain, max_conc, variables) - deletion_results <- calculate_interaction_scores(stats_joined, deletion_strains, max_conc, variables) + reference_results <- calculate_interaction_scores(reference_strain, max_conc, variables) + deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables) zscores_calculations_reference <- reference_results$zscores_calculations zscores_interactions_reference <- reference_results$zscores_interactions