Fix sd vars

This commit is contained in:
2024-09-04 22:16:38 -04:00
parent 464fef2cfc
commit 58fd7453ce

View File

@@ -192,29 +192,29 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
summary_stats <- df %>% summary_stats <- df %>%
group_by(across(all_of(group_vars))) %>% group_by(across(all_of(group_vars))) %>%
summarise(across(all_of(variables), list( summarise(across(all_of(variables), list(
N = ~length(.), N = ~length(na.omit(.)), # Exclude NA values from count
mean = ~mean(., na.rm = TRUE), mean = ~mean(., na.rm = TRUE), # Mean ignoring NAs
median = ~median(., na.rm = TRUE), median = ~median(., na.rm = TRUE), # Median ignoring NAs
max = ~max(., na.rm = TRUE), max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), # Handle groups where all values are NA
min = ~min(., na.rm = TRUE), min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), # Handle groups where all values are NA
sd = ~sd(., na.rm = TRUE) sd = ~sd(., na.rm = TRUE) # Standard deviation ignoring NAs
), .names = "{.fn}_{.col}")) %>% ), .names = "{.fn}_{.col}")) %>%
mutate( mutate(
se_L = sd_L / sqrt(N_L - 1), se_L = ifelse(N_L > 1, sd_L / sqrt(N_L - 1), NA), # Standard error with check for division by zero
se_K = sd_K / sqrt(N_K - 1), se_K = ifelse(N_K > 1, sd_K / sqrt(N_K - 1), NA),
se_r = sd_r / sqrt(N_r - 1), se_r = ifelse(N_r > 1, sd_r / sqrt(N_r - 1), NA),
se_AUC = sd_AUC / sqrt(N_AUC - 1), se_AUC = ifelse(N_AUC > 1, sd_AUC / sqrt(N_AUC - 1), NA),
z_max_L = (max_L - mean_L) / sd_L, z_max_L = ifelse(sd_L == 0, NA, (max_L - mean_L) / sd_L), # Avoid division by zero for Z-scores
z_max_K = (max_K - mean_K) / sd_K, z_max_K = ifelse(sd_K == 0, NA, (max_K - mean_K) / sd_K),
z_max_r = (max_r - mean_r) / sd_r, z_max_r = ifelse(sd_r == 0, NA, (max_r - mean_r) / sd_r),
z_max_AUC = (max_AUC - mean_AUC) / sd_AUC z_max_AUC = ifelse(sd_AUC == 0, NA, (max_AUC - mean_AUC) / sd_AUC)
) )
return(summary_stats) return(summary_stats)
} }
calculate_interaction_scores <- function(df_ref, df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) { calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
# Calculate total concentration variables # Calculate total concentration variables
total_conc_num <- length(unique(df$conc_num)) total_conc_num <- length(unique(df$conc_num))
@@ -351,7 +351,7 @@ generate_plot <- function(df, x_var, y_var = NULL, plot_type, color_var = "conc_
"scatter" = plot + geom_point() + geom_smooth(method = "lm", se = FALSE), "scatter" = plot + geom_point() + geom_smooth(method = "lm", se = FALSE),
"box" = plot + geom_boxplot(), "box" = plot + geom_boxplot(),
"density" = plot + geom_density(), "density" = plot + geom_density(),
"bar" = plot + geom_bar(stat = "identity"), "bar" = plot + geom_bar(),
plot # Default: return the plot as is plot # Default: return the plot as is
) )
@@ -548,7 +548,7 @@ generate_plots <- function(df, x_var, y_vars, plot_type, color_var = "conc_num",
} else if (plot_type == "density") { } else if (plot_type == "density") {
plot <- plot + geom_density() plot <- plot + geom_density()
} else if (plot_type == "bar") { } else if (plot_type == "bar") {
plot <- plot + geom_bar(stat = "identity") plot <- plot + geom_bar()
} }
if (!is.null(x_label)) plot <- plot + xlab(x_label) if (!is.null(x_label)) plot <- plot + xlab(x_label)
@@ -657,9 +657,9 @@ main <- function() {
# Filter data within and outside 2SD # Filter data within and outside 2SD
within_2sd_k <- stats_joined %>% within_2sd_k <- stats_joined %>%
filter(K >= (K_mean - 2 * K_sd) & K <= (K_mean + 2 * K_sd)) filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
outside_2sd_k <- stats_joined %>% outside_2sd_k <- stats_joined %>%
filter(K < (K_mean - 2 * K_sd) | K > (K_mean + 2 * K_sd)) filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
# Summary statistics for within and outside 2SD of K # Summary statistics for within and outside 2SD of K
l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor")) l_within_2sd_k <- calculate_summary_stats(within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
@@ -730,8 +730,8 @@ main <- function() {
# Calculate interactions # Calculate interactions
variables <- c("L", "K", "r", "AUC") variables <- c("L", "K", "r", "AUC")
# We are recalculating some of the data here # We are recalculating some of the data here
reference_results <- calculate_interaction_scores(stats_joined, reference_strain, max_conc, variables) reference_results <- calculate_interaction_scores(reference_strain, max_conc, variables)
deletion_results <- calculate_interaction_scores(stats_joined, deletion_strains, max_conc, variables) deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables)
zscores_calculations_reference <- reference_results$zscores_calculations zscores_calculations_reference <- reference_results$zscores_calculations
zscores_interactions_reference <- reference_results$zscores_interactions zscores_interactions_reference <- reference_results$zscores_interactions