Replace existing summary stats in df
This commit is contained in:
@@ -195,20 +195,22 @@ calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "c
|
|||||||
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
||||||
sd = ~sd(., na.rm = TRUE),
|
sd = ~sd(., na.rm = TRUE),
|
||||||
se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA)
|
se = ~ifelse(sum(!is.na(.)) > 1, sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1), NA)
|
||||||
# TODO: not in original stats but better to do here than in calculate_interactions?
|
|
||||||
# z_max = ~ifelse(sd(., na.rm = TRUE) == 0 | all(is.na(.)), NA,
|
|
||||||
# (max(., na.rm = TRUE) - mean(., na.rm = TRUE)) / sd(., na.rm = TRUE)) # Z-score
|
|
||||||
), .names = "{.fn}_{.col}")
|
), .names = "{.fn}_{.col}")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Join the summary stats back to the original dataframe
|
# Get the column names from the summary_stats dataframe (excluding the group_vars)
|
||||||
df_with_stats <- left_join(df, summary_stats, by = group_vars)
|
stat_columns <- setdiff(names(summary_stats), group_vars)
|
||||||
|
|
||||||
|
# Remove existing stats columns from df if they already exist
|
||||||
|
df_cleaned <- df %>% select(-any_of(stat_columns))
|
||||||
|
|
||||||
|
# Join the summary stats back to the cleaned original dataframe
|
||||||
|
df_with_stats <- left_join(df_cleaned, summary_stats, by = group_vars)
|
||||||
|
|
||||||
# Return both the summary stats and the updated dataframe
|
# Return both the summary stats and the updated dataframe
|
||||||
return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
|
return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Calculate interaction scores
|
# Calculate interaction scores
|
||||||
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
|
||||||
|
|
||||||
@@ -651,9 +653,9 @@ main <- function() {
|
|||||||
|
|
||||||
# Filter data within and outside 2SD
|
# Filter data within and outside 2SD
|
||||||
message("Filtering by 2SD of K")
|
message("Filtering by 2SD of K")
|
||||||
df_na_within_2sd_k <- df_na %>%
|
df_na_within_2sd_k <- df_na_stats %>%
|
||||||
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
|
filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
|
||||||
df_na_outside_2sd_k <- df_na %>%
|
df_na_outside_2sd_k <- df_na_stats %>%
|
||||||
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
|
filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
|
||||||
|
|
||||||
# Summary statistics for within and outside 2SD of K
|
# Summary statistics for within and outside 2SD of K
|
||||||
|
|||||||
Reference in New Issue
Block a user