Use a single N value
This commit is contained in:
@@ -133,7 +133,6 @@ load_and_process_data <- function(easy_results_file, sd = 3) {
|
|||||||
return(df)
|
return(df)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Update Gene names using the SGD gene list
|
# Update Gene names using the SGD gene list
|
||||||
update_gene_names <- function(df, sgd_gene_list) {
|
update_gene_names <- function(df, sgd_gene_list) {
|
||||||
# Load SGD gene list
|
# Load SGD gene list
|
||||||
@@ -184,21 +183,20 @@ process_strains <- function(df) {
|
|||||||
|
|
||||||
# Calculate summary statistics for all variables
|
# Calculate summary statistics for all variables
|
||||||
calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
|
calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
|
||||||
# Generate summary statistics
|
|
||||||
summary_stats <- df %>%
|
summary_stats <- df %>%
|
||||||
group_by(across(all_of(group_vars))) %>%
|
group_by(across(all_of(group_vars))) %>%
|
||||||
reframe(across(all_of(variables), list(
|
reframe(across(all_of(variables), list(
|
||||||
N = ~sum(!is.na(.)), # Count of non-NA values
|
mean = ~mean(., na.rm = TRUE),
|
||||||
mean = ~mean(., na.rm = TRUE), # Mean ignoring NAs
|
median = ~median(., na.rm = TRUE),
|
||||||
median = ~median(., na.rm = TRUE), # Median ignoring NAs
|
max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
||||||
max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)), # Return NA if all values are NA
|
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
||||||
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)), # Return NA if all values are NA
|
sd = ~sd(., na.rm = TRUE),
|
||||||
sd = ~sd(., na.rm = TRUE), # Standard deviation ignoring NAs
|
se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA)
|
||||||
se = ~ifelse(N > 1, sd(., na.rm = TRUE) / sqrt(N - 1), NA) # Standard Error using precomputed N
|
|
||||||
# TODO: not in original stats but better to do here than in calculate_interactions?
|
# TODO: not in original stats but better to do here than in calculate_interactions?
|
||||||
# z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA,
|
# z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA,
|
||||||
# (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score
|
# (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score
|
||||||
), .names = "{.fn}_{.col}"))
|
), .names = "{.fn}_{.col}")) %>%
|
||||||
|
mutate(N = ~sum(!is.na(L))) # count of non-NA L values
|
||||||
|
|
||||||
# Join the summary stats back to the original dataframe
|
# Join the summary stats back to the original dataframe
|
||||||
df_with_stats <- left_join(df, summary_stats, by = group_vars)
|
df_with_stats <- left_join(df, summary_stats, by = group_vars)
|
||||||
@@ -207,7 +205,7 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
|||||||
return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
|
return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Calculate interaction scores
|
||||||
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
||||||
|
|
||||||
# Calculate total concentration variables
|
# Calculate total concentration variables
|
||||||
@@ -691,6 +689,9 @@ main <- function() {
|
|||||||
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
|
||||||
row.names = FALSE)
|
row.names = FALSE)
|
||||||
|
|
||||||
|
print("Background summary stats:")
|
||||||
|
print(head(summary_stats_bg))
|
||||||
|
|
||||||
# Filter reference and deletion strains
|
# Filter reference and deletion strains
|
||||||
# Formerly X2_RF (reference strain)
|
# Formerly X2_RF (reference strain)
|
||||||
df_reference <- df_bg_stats %>%
|
df_reference <- df_bg_stats %>%
|
||||||
|
|||||||
Reference in New Issue
Block a user