|
@@ -155,9 +155,7 @@ update_gene_names <- function(df, sgd_gene_list) {
|
|
return(df)
|
|
return(df)
|
|
}
|
|
}
|
|
|
|
|
|
-# Calculate summary statistics for all variables
|
|
|
|
calculate_summary_stats <- function(df, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) {
|
|
calculate_summary_stats <- function(df, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor")) {
|
|
-
|
|
|
|
# Summarize the variables within the grouped data
|
|
# Summarize the variables within the grouped data
|
|
summary_stats <- df %>%
|
|
summary_stats <- df %>%
|
|
group_by(across(all_of(group_vars))) %>%
|
|
group_by(across(all_of(group_vars))) %>%
|
|
@@ -166,19 +164,18 @@ calculate_summary_stats <- function(df, variables, group_vars = c("OrfRep", "con
|
|
across(all_of(variables), list(
|
|
across(all_of(variables), list(
|
|
mean = ~mean(., na.rm = TRUE),
|
|
mean = ~mean(., na.rm = TRUE),
|
|
median = ~median(., na.rm = TRUE),
|
|
median = ~median(., na.rm = TRUE),
|
|
- max = ~ ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
|
|
|
- min = ~ ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
|
|
|
|
|
+ max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
|
|
|
+ min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
|
sd = ~sd(., na.rm = TRUE),
|
|
sd = ~sd(., na.rm = TRUE),
|
|
- se = ~ ifelse(all(is.na(.)), NA, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1))
|
|
|
|
|
|
+ se = ~ifelse(all(is.na(.)), NA, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1))
|
|
), .names = "{.fn}_{.col}")
|
|
), .names = "{.fn}_{.col}")
|
|
)
|
|
)
|
|
|
|
|
|
- # print(summary_stats)
|
|
|
|
-
|
|
|
|
- # Prevent .x and .y suffix issues by renaming columns
|
|
|
|
|
|
+ # Create a cleaned version of df that doesn't overlap with summary_stats
|
|
|
|
+ cols_to_exclude <- unique(c(variables, group_vars))
|
|
df_cleaned <- df %>%
|
|
df_cleaned <- df %>%
|
|
- select(-any_of(setdiff(names(summary_stats), group_vars))) # Avoid duplicate columns in the final join
|
|
|
|
-
|
|
|
|
|
|
+ select(-any_of(cols_to_exclude))
|
|
|
|
+
|
|
# Join the stats back to the original dataframe
|
|
# Join the stats back to the original dataframe
|
|
df_with_stats <- left_join(df_cleaned, summary_stats, by = group_vars)
|
|
df_with_stats <- left_join(df_cleaned, summary_stats, by = group_vars)
|
|
|
|
|
|
@@ -205,22 +202,8 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
|
|
AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
|
|
AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
|
|
)
|
|
)
|
|
|
|
|
|
- stats <- df %>%
|
|
|
|
- group_by(OrfRep, Gene, num, conc_num, conc_num_factor) %>%
|
|
|
|
- summarise(
|
|
|
|
- N = sum(!is.na(L)),
|
|
|
|
- NG = sum(NG, na.rm = TRUE),
|
|
|
|
- DB = sum(DB, na.rm = TRUE),
|
|
|
|
- SM = sum(SM, na.rm = TRUE),
|
|
|
|
- across(all_of(variables), list(
|
|
|
|
- mean = ~mean(., na.rm = TRUE),
|
|
|
|
- median = ~median(., na.rm = TRUE),
|
|
|
|
- max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
|
|
|
- min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
|
|
|
- sd = ~sd(., na.rm = TRUE),
|
|
|
|
- se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA)
|
|
|
|
- ), .names = "{.fn}_{.col}")
|
|
|
|
- )
|
|
|
|
|
|
+ stats <- calculate_summary_stats(df, variables,
|
|
|
|
+ group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))$summary_stats
|
|
|
|
|
|
stats <- df %>%
|
|
stats <- df %>%
|
|
group_by(OrfRep, Gene, num) %>%
|
|
group_by(OrfRep, Gene, num) %>%
|
|
@@ -873,14 +856,6 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
|
avg_zscore_cols <- paste0("Avg_Zscore_", variables)
|
|
z_lm_cols <- paste0("Z_lm_", variables)
|
|
z_lm_cols <- paste0("Z_lm_", variables)
|
|
|
|
|
|
- # # Optional: Validate that the expected columns exist in the dataframe
|
|
|
|
- # expected_cols <- c(avg_zscore_cols, z_lm_cols, variables)
|
|
|
|
- # missing_cols <- setdiff(expected_cols, names(df))
|
|
|
|
- # if (length(missing_cols) > 0) {
|
|
|
|
- # stop("The following expected columns are missing from the dataframe: ",
|
|
|
|
- # paste(missing_cols, collapse = ", "))
|
|
|
|
- # }
|
|
|
|
-
|
|
|
|
# Adjust NAs if 'adjust' is TRUE
|
|
# Adjust NAs if 'adjust' is TRUE
|
|
if (adjust) {
|
|
if (adjust) {
|
|
if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns.")
|
|
if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns.")
|