diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R index 6ce5f18a..1c35e639 100644 --- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R +++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R @@ -634,6 +634,16 @@ generate_plate_analysis_plot_configs <- function(variables, stages = c("before", for (var in variables) { for (stage in stages) { df_plot <- if (stage == "before") df_before else df_after + + # Check for non-finite values in the y-variable + df_plot_filtered <- df_plot %>% + filter(is.finite(!!sym(var))) + + # Count removed rows + removed_rows <- nrow(df_plot) - nrow(df_plot_filtered) + if (removed_rows > 0) { + message(sprintf("Removed %d non-finite values for variable %s during stage %s", removed_rows, var, stage)) + } # Adjust settings based on plot_type if (plot_type == "scatter") { @@ -660,7 +670,7 @@ generate_plate_analysis_plot_configs <- function(variables, stages = c("before", return(plots) } -generate_interaction_plot_configs <- function(df, variables, limits_map = NULL) { +generate_interaction_plot_configs <- function(df, limits_map = NULL) { # Default limits_map if not provided if (is.null(limits_map)) { limits_map <- list( @@ -682,8 +692,8 @@ generate_interaction_plot_configs <- function(df, variables, limits_map = NULL) configs <- list() - for (variable in variables) { - y_range <- limits_map[[variable]] + for (var in names(limits_map)) { + y_range <- limits_map[[var]] # Calculate annotation positions y_min <- min(y_range) @@ -699,8 +709,8 @@ generate_interaction_plot_configs <- function(df, variables, limits_map = NULL) # Prepare linear model line lm_line <- list( - intercept = df_filtered[[paste0("lm_intercept_", variable)]], - slope = df_filtered[[paste0("lm_slope_", variable)]] + intercept = df_filtered[[paste0("lm_intercept_", var)]], + slope = df_filtered[[paste0("lm_slope_", var)]] ) # Calculate x-axis position for annotations @@ -710,8 +720,8 @@ generate_interaction_plot_configs <- function(df, variables, limits_map = NULL) # Generate annotations annotations <- lapply(names(annotation_positions), function(annotation_name) { label <- switch(annotation_name, - ZShift = paste("ZShift =", round(df_filtered[[paste0("Z_Shift_", variable)]], 2)), - lm_ZScore = paste("lm ZScore =", round(df_filtered[[paste0("Z_lm_", variable)]], 2)), + ZShift = paste("ZShift =", round(df_filtered[[paste0("Z_Shift_", var)]], 2)), + lm_ZScore = paste("lm ZScore =", round(df_filtered[[paste0("Z_lm_", var)]], 2)), NG = paste("NG =", df_filtered$NG), DB = paste("DB =", df_filtered$DB), SM = paste("SM =", df_filtered$SM), @@ -729,7 +739,7 @@ generate_interaction_plot_configs <- function(df, variables, limits_map = NULL) plot_settings <- list( df = df_filtered, x_var = "conc_num_factor", - y_var = variable, + y_var = var, ylim_vals = y_range, annotations = annotations, lm_line = lm_line, @@ -1023,7 +1033,6 @@ main <- function() { df = df, variables = summary_vars, group_vars = c("conc_num", "conc_num_factor"))$df_with_stats - message("Filtering non-finite data") message("Calculating summary statistics after quality control") ss <- calculate_summary_stats( @@ -1033,6 +1042,8 @@ main <- function() { df_na_ss <- ss$summary_stats df_na_stats <- ss$df_with_stats write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE) + # For plotting (ggplot warns on NAs) + df_na_stats_filtered <- df_na_stats %>% filter(across(all_of(summary_vars), is.finite)) df_na_stats <- df_na_stats %>% mutate( @@ -1153,20 +1164,20 @@ main <- function() { plate_analysis_plot_configs <- generate_plate_analysis_plot_configs( variables = summary_vars, df_before = df_stats, - df_after = df_na_stats, + df_after = df_na_stats_filtered ) plate_analysis_boxplot_configs <- generate_plate_analysis_plot_configs( variables = summary_vars, df_before = df_stats, - df_after = df_na_stats, + df_after = df_na_stats_filtered, plot_type = "box" ) plate_analysis_no_zeros_plot_configs <- generate_plate_analysis_plot_configs( variables = summary_vars, stages = c("after"), # Only after QC - df_after = df_no_zeros_stats, + df_after = df_no_zeros_stats ) plate_analysis_no_zeros_boxplot_configs <- generate_plate_analysis_plot_configs( @@ -1208,7 +1219,7 @@ main <- function() { # TODO trying out some parallelization # future::plan(future::multicore, workers = parallel::detectCores()) - future::plan(future::multisession, workers = 3) + future::plan(future::multisession, workers = 3) # generate 3 plots in parallel plot_configs <- list( list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control", @@ -1318,11 +1329,11 @@ main <- function() { # Create interaction plots message("Generating reference interaction plots") - reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined, interaction_vars) + reference_plot_configs <- generate_interaction_plot_configs(zscore_interactions_reference_joined) generate_and_save_plots(out_dir, "interaction_plots_reference", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) message("Generating deletion interaction plots") - deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined, interaction_vars) + deletion_plot_configs <- generate_interaction_plot_configs(zscore_interactions_joined) generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3)) # Define conditions for enhancers and suppressors