Suppress some interaction score warnings

This commit is contained in:
2024-09-12 22:34:24 -04:00
parent 54b32f47c7
commit 4b2157df5f

View File

@@ -275,25 +275,60 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
) )
# Create linear models with error handling for missing/insufficient data # Create linear models with error handling for missing/insufficient data
# This part is a PITA so best to contain it in its own function
calculate_lm_values <- function(y, x) {
if (length(unique(x)) > 1 && sum(!is.na(y)) > 1) {
# Suppress warnings only for perfect fits or similar issues
model <- suppressWarnings(lm(y ~ x))
coefficients <- coef(model)
r_squared <- tryCatch({
summary(model)$r.squared
}, warning = function(w) {
NA # Set r-squared to NA if there's a warning
})
return(list(intercept = coefficients[1], slope = coefficients[2], r_squared = r_squared))
} else {
return(list(intercept = NA, slope = NA, r_squared = NA))
}
}
lms <- stats %>% lms <- stats %>%
summarise( group_by(across(all_of(group_vars))) %>%
lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NA, reframe(
lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NA, lm_L = list(calculate_lm_values(Delta_L, conc_num_factor)),
lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NA, lm_K = list(calculate_lm_values(Delta_K, conc_num_factor)),
lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NA lm_r = list(calculate_lm_values(Delta_r, conc_num_factor)),
lm_AUC = list(calculate_lm_values(Delta_AUC, conc_num_factor))
) )
lms <- lms %>%
mutate(
lm_L_intercept = sapply(lm_L, `[[`, "intercept"),
lm_L_slope = sapply(lm_L, `[[`, "slope"),
lm_L_r_squared = sapply(lm_L, `[[`, "r_squared"),
lm_K_intercept = sapply(lm_K, `[[`, "intercept"),
lm_K_slope = sapply(lm_K, `[[`, "slope"),
lm_K_r_squared = sapply(lm_K, `[[`, "r_squared"),
lm_r_intercept = sapply(lm_r, `[[`, "intercept"),
lm_r_slope = sapply(lm_r, `[[`, "slope"),
lm_r_r_squared = sapply(lm_r, `[[`, "r_squared"),
lm_AUC_intercept = sapply(lm_AUC, `[[`, "intercept"),
lm_AUC_slope = sapply(lm_AUC, `[[`, "slope"),
lm_AUC_r_squared = sapply(lm_AUC, `[[`, "r_squared")
) %>%
select(-lm_L, -lm_K, -lm_r, -lm_AUC)
stats <- stats %>% stats <- stats %>%
left_join(lms, by = group_vars) %>% left_join(lms, by = group_vars) %>%
mutate( mutate(
lm_Score_L = lapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), lm_Score_L = lm_L_slope * max_conc + lm_L_intercept,
lm_Score_K = lapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), lm_Score_K = lm_K_slope * max_conc + lm_K_intercept,
lm_Score_r = lapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), lm_Score_r = lm_r_slope * max_conc + lm_r_intercept,
lm_Score_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), lm_Score_AUC = lm_AUC_slope * max_conc + lm_AUC_intercept,
R_Squared_L = lapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA), R_Squared_L = lm_L_r_squared,
R_Squared_K = lapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA), R_Squared_K = lm_K_r_squared,
R_Squared_r = lapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA), R_Squared_r = lm_r_r_squared,
R_Squared_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA), R_Squared_AUC = lm_AUC_r_squared,
Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
@@ -302,10 +337,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c
stats <- stats %>% stats <- stats %>%
mutate( mutate(
lm_Score_L = unlist(lm_Score_L),
lm_Score_K = unlist(lm_Score_K),
lm_Score_r = unlist(lm_Score_r),
lm_Score_AUC = unlist(lm_Score_AUC),
Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs, Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1), Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
@@ -356,8 +387,8 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la
plots <- lapply(plot_configs, function(config) { plots <- lapply(plot_configs, function(config) {
df <- config$df df <- config$df
print(df %>% select(any_of(c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor", # print(df %>% select(any_of(c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
"delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB"))), n = 100) # "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB"))), n = 5)
# Define aes mapping based on the presence of y_var # Define aes mapping based on the presence of y_var
aes_mapping <- if (is.null(config$y_var)) { aes_mapping <- if (is.null(config$y_var)) {
@@ -785,7 +816,7 @@ main <- function() {
file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"), row.names = FALSE) file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"), row.names = FALSE)
# Each plots list corresponds to a file # Each plots list corresponds to a file
message("Generating QC plot configurations") message("Generating quality control plot configurations")
l_vs_k_plots <- list( l_vs_k_plots <- list(
list( list(
df = df, df = df,
@@ -947,7 +978,7 @@ main <- function() {
) )
) )
message("Generating QC plots") message("Generating quality control plots")
generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
@@ -1046,8 +1077,10 @@ main <- function() {
write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE) write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
# Create interaction plots # Create interaction plots
message("Generating interaction plot configurations")
reference_plot_configs <- generate_interaction_plot_configs(df_reference, interaction_vars) reference_plot_configs <- generate_interaction_plot_configs(df_reference, interaction_vars)
deletion_plot_configs <- generate_interaction_plot_configs(df_deletion, interaction_vars) deletion_plot_configs <- generate_interaction_plot_configs(df_deletion, interaction_vars)
message("Generating interaction plots")
generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))