|
@@ -168,10 +168,10 @@ calculate_summary_stats <- function(df, variables, group_vars) {
|
|
|
across(all_of(variables), list(
|
|
|
mean = ~mean(., na.rm = TRUE),
|
|
|
median = ~median(., na.rm = TRUE),
|
|
|
- max = ~max(., na.rm = TRUE),
|
|
|
- min = ~min(., na.rm = TRUE),
|
|
|
+ max = ~ ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
|
|
|
+ min = ~ ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
|
|
|
sd = ~sd(., na.rm = TRUE),
|
|
|
- se = ~sd(., na.rm = TRUE) / sqrt(N) # Corrected SE calculation
|
|
|
+ se = ~sd(., na.rm = TRUE) / sqrt(N) - 1 # TODO needs comment for explanation
|
|
|
), .names = "{.fn}_{.col}"),
|
|
|
.groups = "drop"
|
|
|
)
|
|
@@ -331,7 +331,7 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars) {
|
|
|
# Declare column order for output
|
|
|
calculations <- stats %>%
|
|
|
select(
|
|
|
- "OrfRep", "Gene", "conc_num", "conc_num_factor", "N",
|
|
|
+ "OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "N",
|
|
|
"mean_L", "mean_K", "mean_r", "mean_AUC",
|
|
|
"median_L", "median_K", "median_r", "median_AUC",
|
|
|
"sd_L", "sd_K", "sd_r", "sd_AUC",
|
|
@@ -932,7 +932,7 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
|
|
|
|
# Adjust NAs to .001 for linear model
|
|
|
if (adjust) {
|
|
|
- if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns.")
|
|
|
+ if (verbose) message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns")
|
|
|
df <- df %>%
|
|
|
mutate(
|
|
|
across(all_of(avg_zscore_cols), ~ ifelse(is.na(.), 0.001, .)),
|
|
@@ -942,14 +942,12 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
|
|
|
|
# Filter non-finite values
|
|
|
if (nf) {
|
|
|
- if (verbose) message("Filtering non-finite values for variables: ", paste(variables, collapse = ", "))
|
|
|
-
|
|
|
non_finite_df <- df %>%
|
|
|
filter(if_any(all_of(variables), ~ !is.finite(.)))
|
|
|
|
|
|
if (verbose && nrow(non_finite_df) > 0) {
|
|
|
- message("Filtering non-finite rows for variables ", paste(variables, collapse = ", "), ":")
|
|
|
- print(non_finite_df)
|
|
|
+ message("Filtering non-finite rows for variable(s) ", paste(variables, collapse = ", "), ":")
|
|
|
+ print(non_finite_df %>% select(all_of(c("scan", "Plate", "Row", "Col", "num", "conc_num", variables))), n = 30)
|
|
|
}
|
|
|
|
|
|
df <- df %>%
|
|
@@ -958,18 +956,15 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
|
|
|
|
# Filter missing values
|
|
|
if (missing) {
|
|
|
- if (verbose) message("Filtering missing values for variables: ", paste(variables, collapse = ", "))
|
|
|
|
|
|
- # Identify missing rows for logging
|
|
|
missing_df <- df %>%
|
|
|
filter(if_any(all_of(variables), ~ is.na(.)))
|
|
|
|
|
|
if (verbose && nrow(missing_df) > 0) {
|
|
|
- message("Missing data for variables ", paste(variables, collapse = ", "), ":")
|
|
|
- print(missing_df)
|
|
|
+ message("Filtering missing data for variable(s) ", paste(variables, collapse = ", "), ":")
|
|
|
+ print(missing_df %>% select(all_of(c("scan", "Plate", "Row", "Col", "num", "conc_num", variables))), n = 30)
|
|
|
}
|
|
|
|
|
|
- # Keep only rows where all specified variables are not missing
|
|
|
df <- df %>%
|
|
|
filter(if_all(all_of(variables), ~ !is.na(.)))
|
|
|
}
|
|
@@ -980,14 +975,13 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
|
if (variable %in% variables) {
|
|
|
ylim_vals <- limits_map[[variable]]
|
|
|
|
|
|
- if (verbose) message("Applying limits for variable ", variable, ": [", ylim_vals[1], ", ", ylim_vals[2], "].")
|
|
|
-
|
|
|
out_of_range_df <- df %>%
|
|
|
filter(.data[[variable]] < ylim_vals[1] | .data[[variable]] > ylim_vals[2])
|
|
|
|
|
|
if (verbose && nrow(out_of_range_df) > 0) {
|
|
|
- message("Out-of-range data for variable ", variable, ":")
|
|
|
- print(out_of_range_df)
|
|
|
+ message("Applying limits for variable ", variable, ": [", ylim_vals[1], ", ", ylim_vals[2], "].")
|
|
|
+ message("Filtering out-of-range data for variable ", variable, ":")
|
|
|
+ print(out_of_range_df %>% select(all_of(c("scan", "Plate", "Row", "Col", "num", "conc_num", variables))), n = 30)
|
|
|
}
|
|
|
|
|
|
df <- df %>%
|
|
@@ -998,7 +992,7 @@ filter_data <- function(df, variables, nf = FALSE, missing = FALSE, adjust = FAL
|
|
|
|
|
|
# Calculate Rank Columns if 'rank' is TRUE
|
|
|
if (rank) {
|
|
|
- if (verbose) message("Calculating rank columns for variables: ", paste(variables, collapse = ", "))
|
|
|
+ if (verbose) message("Calculating ranks for variable(s): ", paste(variables, collapse = ", "))
|
|
|
|
|
|
for (col in avg_zscore_cols) {
|
|
|
rank_col <- paste0("Rank_", col)
|