diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 69401a59..a5a783b6 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -275,25 +275,60 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c ) # Create linear models with error handling for missing/insufficient data + # This part is a PITA so best to contain it in its own function + calculate_lm_values <- function(y, x) { + if (length(unique(x)) > 1 && sum(!is.na(y)) > 1) { + # Suppress warnings only for perfect fits or similar issues + model <- suppressWarnings(lm(y ~ x)) + coefficients <- coef(model) + r_squared <- tryCatch({ + summary(model)$r.squared + }, warning = function(w) { + NA # Set r-squared to NA if there's a warning + }) + return(list(intercept = coefficients[1], slope = coefficients[2], r_squared = r_squared)) + } else { + return(list(intercept = NA, slope = NA, r_squared = NA)) + } + } + lms <- stats %>% - summarise( - lm_L = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_L)) > 1) list(lm(Delta_L ~ conc_num_factor)) else NA, - lm_K = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_K)) > 1) list(lm(Delta_K ~ conc_num_factor)) else NA, - lm_r = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_r)) > 1) list(lm(Delta_r ~ conc_num_factor)) else NA, - lm_AUC = if (n_distinct(conc_num_factor) > 1 && sum(!is.na(Delta_AUC)) > 1) list(lm(Delta_AUC ~ conc_num_factor)) else NA + group_by(across(all_of(group_vars))) %>% + reframe( + lm_L = list(calculate_lm_values(Delta_L, conc_num_factor)), + lm_K = list(calculate_lm_values(Delta_K, conc_num_factor)), + lm_r = list(calculate_lm_values(Delta_r, conc_num_factor)), + lm_AUC = list(calculate_lm_values(Delta_AUC, conc_num_factor)) ) + lms <- lms %>% + mutate( + lm_L_intercept = sapply(lm_L, `[[`, "intercept"), + lm_L_slope = sapply(lm_L, `[[`, "slope"), + lm_L_r_squared = sapply(lm_L, `[[`, "r_squared"), + lm_K_intercept = sapply(lm_K, `[[`, "intercept"), + lm_K_slope = sapply(lm_K, `[[`, "slope"), + lm_K_r_squared = sapply(lm_K, `[[`, "r_squared"), + lm_r_intercept = sapply(lm_r, `[[`, "intercept"), + lm_r_slope = sapply(lm_r, `[[`, "slope"), + lm_r_r_squared = sapply(lm_r, `[[`, "r_squared"), + lm_AUC_intercept = sapply(lm_AUC, `[[`, "intercept"), + lm_AUC_slope = sapply(lm_AUC, `[[`, "slope"), + lm_AUC_r_squared = sapply(lm_AUC, `[[`, "r_squared") + ) %>% + select(-lm_L, -lm_K, -lm_r, -lm_AUC) + stats <- stats %>% left_join(lms, by = group_vars) %>% mutate( - lm_Score_L = lapply(lm_L, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - lm_Score_K = lapply(lm_K, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - lm_Score_r = lapply(lm_r, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - lm_Score_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) coef(model)[2] * max_conc + coef(model)[1] else NA), - R_Squared_L = lapply(lm_L, function(model) if (!is.null(model)) summary(model)$r.squared else NA), - R_Squared_K = lapply(lm_K, function(model) if (!is.null(model)) summary(model)$r.squared else NA), - R_Squared_r = lapply(lm_r, function(model) if (!is.null(model)) summary(model)$r.squared else NA), - R_Squared_AUC = lapply(lm_AUC, function(model) if (!is.null(model)) summary(model)$r.squared else NA), + lm_Score_L = lm_L_slope * max_conc + lm_L_intercept, + lm_Score_K = lm_K_slope * max_conc + lm_K_intercept, + lm_Score_r = lm_r_slope * max_conc + lm_r_intercept, + lm_Score_AUC = lm_AUC_slope * max_conc + lm_AUC_intercept, + R_Squared_L = lm_L_r_squared, + R_Squared_K = lm_K_r_squared, + R_Squared_r = lm_r_r_squared, + R_Squared_AUC = lm_AUC_r_squared, Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE), Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE), Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE), @@ -302,10 +337,6 @@ calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c stats <- stats %>% mutate( - lm_Score_L = unlist(lm_Score_L), - lm_Score_K = unlist(lm_Score_K), - lm_Score_r = unlist(lm_Score_r), - lm_Score_AUC = unlist(lm_Score_AUC), Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs, Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs, Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1), @@ -356,8 +387,8 @@ generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_la plots <- lapply(plot_configs, function(config) { df <- config$df - print(df %>% select(any_of(c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor", - "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB"))), n = 100) + # print(df %>% select(any_of(c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor", + # "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB"))), n = 5) # Define aes mapping based on the presence of y_var aes_mapping <- if (is.null(config$y_var)) { @@ -785,7 +816,7 @@ main <- function() { file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"), row.names = FALSE) # Each plots list corresponds to a file - message("Generating QC plot configurations") + message("Generating quality control plot configurations") l_vs_k_plots <- list( list( df = df, @@ -947,7 +978,7 @@ main <- function() { ) ) - message("Generating QC plots") + message("Generating quality control plots") generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots) generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots) generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots) @@ -1046,8 +1077,10 @@ main <- function() { write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE) # Create interaction plots + message("Generating interaction plot configurations") reference_plot_configs <- generate_interaction_plot_configs(df_reference, interaction_vars) deletion_plot_configs <- generate_interaction_plot_configs(df_deletion, interaction_vars) + message("Generating interaction plots") generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))