Simplify plot config format

2024-09-30 01:26:36 -04:00
parent 5d25b3c3ef
commit 2f42357aa1
1 changed files with 301 additions and 409 deletions
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -112,7 +112,7 @@ theme_publication <- function(base_size = 14, base_family = "sans", legend_posit
      legend.spacing = unit(0, "cm"),
      legend.title = element_text(face = "italic", size = rel(1.3)),
      legend.text = element_text(size = rel(1.2)),
-      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      plot.margin = unit(c(10, 5, 5, 5), "mm")
      # strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
      # strip.text = element_text(face = "bold")
    )
@@ -150,9 +150,8 @@ load_and_filter_data <- function(easy_results_file, sd = 3) {
      SM = 0,
      OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded?
      conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
-      conc_num_factor_new = factor(conc_num),
-      conc_num_factor_zeroed = factor(as.numeric(conc_num_factor_new) - 1),
-      conc_num_factor = as.numeric(conc_num_factor_zeroed) # for legacy purposes
+      conc_num_factor = as.numeric(as.factor(conc_num)) - 1, # for legacy purposes
+      conc_num_factor_factor = factor(conc_num)
    )

  return(df)
@@ -186,12 +185,12 @@ calculate_summary_stats <- function(df, variables, group_vars) {
      N = n(),
      across(all_of(variables),
        list(
-          mean = ~mean(., na.rm = TRUE),
-          median = ~median(., na.rm = TRUE),
-          max = ~ifelse(all(is.na(.)), NA, max(., na.rm = TRUE)),
-          min = ~ifelse(all(is.na(.)), NA, min(., na.rm = TRUE)),
-          sd = ~sd(., na.rm = TRUE),
-          se = ~sd(., na.rm = TRUE) / sqrt(N - 1) # Bessel's correction
+          mean = ~ mean(.x, na.rm = TRUE),
+          median = ~ median(.x, na.rm = TRUE),
+          max = ~ ifelse(all(is.na(.x)), NA, max(.x, na.rm = TRUE)),
+          min = ~ ifelse(all(is.na(.x)), NA, min(.x, na.rm = TRUE)),
+          sd = ~ sd(.x, na.rm = TRUE),
+          se = ~ sd(.x, na.rm = TRUE) / sqrt(n() - 1)
        ),
        .names = "{.fn}_{.col}"
      ),
@@ -335,7 +334,8 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
  # Declare column order for output
  calculations <- calculations %>%
    select(
-      "OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "N",
+      "OrfRep", "Gene", "num", "N",
+      "conc_num", "conc_num_factor", "conc_num_factor_factor",
      "mean_L", "mean_K", "mean_r", "mean_AUC",
      "median_L", "median_K", "median_r", "median_AUC",
      "sd_L", "sd_K", "sd_r", "sd_AUC",
@@ -352,7 +352,8 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,

  interactions <- interactions %>%
    select(
-      "OrfRep", "Gene", "conc_num", "conc_num_factor", "num", "NG", "DB", "SM",
+      "OrfRep", "Gene", "num", "NG", "DB", "SM",
+      "conc_num", "conc_num_factor", "conc_num_factor_factor",
      "Raw_Shift_L", "Raw_Shift_K", "Raw_Shift_r", "Raw_Shift_AUC",
      "Z_Shift_L", "Z_Shift_K", "Z_Shift_r", "Z_Shift_AUC",
      "Sum_Z_Score_L", "Sum_Z_Score_K", "Sum_Z_Score_r", "Sum_Z_Score_AUC",
@@ -366,10 +367,11 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
  cleaned_df <- df %>%
    select(-any_of(
      setdiff(intersect(names(df), names(calculations)),
-      c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))))
+      c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))))

  # Join the original dataframe with calculations
-  df_with_calculations <- left_join(cleaned_df, calculations, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
+  df_with_calculations <- left_join(cleaned_df, calculations,
+    by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))

  # Remove overlapping columns between df_with_calculations and interactions
  df_with_calculations_clean <- df_with_calculations %>%
@@ -378,7 +380,8 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
      c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))))

  # Join with interactions to create the full dataset
-  full_data <- left_join(df_with_calculations_clean, interactions, by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))
+  full_data <- left_join(df_with_calculations_clean, interactions,
+    by = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))

  return(list(
    calculations = calculations,
@@ -390,131 +393,67 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
 generate_and_save_plots <- function(out_dir, filename, plot_configs) {
  message("Generating ", filename, ".pdf and ", filename, ".html")

-  for (config_group in plot_configs) {
-    plot_list <- config_group$plots
-    grid_nrow <- config_group$grid_layout$nrow
-    grid_ncol <- config_group$grid_layout$ncol
+  grid_nrow <- ifelse(is.null(plot_configs$grid_layout$nrow), 1, plot_configs$grid_layout$nrow)
+  grid_ncol <- ifelse(is.null(plot_configs$grid_layout$ncol), length(plot_configs$plots), plot_configs$grid_layout$ncol)

-    # Set defaults if nrow or ncol are not provided
-    if (is.null(grid_nrow) || is.null(grid_ncol)) {
-      num_plots <- length(plot_list)
-      grid_nrow <- ifelse(is.null(grid_nrow), 1, grid_nrow)
-      grid_ncol <- ifelse(is.null(grid_ncol), num_plots, grid_ncol)
-    }
-
-    # Prepare lists to collect static and interactive plots
  static_plots <- list()
  plotly_plots <- list()

-    # Generate each individual plot based on the configuration
-    for (i in seq_along(plot_list)) {
-      config <- plot_list[[i]]
+  for (i in seq_along(plot_configs$plots)) {
+    config <- plot_configs$plots[[i]]
    df <- config$df

-      # Create the base plot
+    # Define aes_mapping and handle color_var defaulting
    aes_mapping <- if (config$plot_type == "bar") {
-        if (!is.null(config$color_var)) {
-          aes(x = .data[[config$x_var]], fill = as.factor(.data[[config$color_var]]), color = as.factor(.data[[config$color_var]]))
-        } else {
-          aes(x = .data[[config$x_var]])
-        }
+      aes(x = .data[[config$x_var]], fill = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"),
+        color = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"))
    } else if (config$plot_type == "density") {
-        if (!is.null(config$color_var)) {
-          aes(x = .data[[config$x_var]], color = as.factor(.data[[config$color_var]]))
+      aes(x = .data[[config$x_var]], color = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"))
    } else {
-          aes(x = .data[[config$x_var]])
-        }
-      } else {
-        if (!is.null(config$color_var)) {
-          aes(x = .data[[config$x_var]], y = .data[[config$y_var]], color = as.factor(.data[[config$color_var]]))
-        } else {
-          aes(x = .data[[config$x_var]], y = .data[[config$y_var]])
-        }
+      aes(x = .data[[config$x_var]], y = .data[[config$y_var]], color = ifelse(!is.null(config$color_var), .data[[config$color_var]], "black"))
    }

-      plot <- ggplot(df, aes_mapping)
+    # Apply theme_publication with legend_position
+    plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position)

-      # Apply theme_publication with legend_position from config
-      legend_position <- if (!is.null(config$legend_position)) config$legend_position else "bottom"
-      plot <- plot + theme_publication(legend_position = legend_position)
-
-      # Use appropriate helper function based on plot type
+    # Apply appropriate plot function
    plot <- switch(config$plot_type,
      "scatter" = generate_scatter_plot(plot, config),
      "box" = generate_box_plot(plot, config),
      "density" = plot + geom_density(),
      "bar" = plot + geom_bar(),
-        plot  # default case if no type matches
+      plot  # default case
    )

-      # Add title and labels
-      if (!is.null(config$title)) {
-        plot <- plot + ggtitle(config$title)
-      }
-      if (!is.null(config$x_label)) {
-        plot <- plot + xlab(config$x_label)
-      }
-      if (!is.null(config$y_label)) {
-        plot <- plot + ylab(config$y_label)
-      }
-
-      # Add cartesian coordinates if specified
-      if (!is.null(config$coord_cartesian)) {
-        plot <- plot + coord_cartesian(ylim = config$coord_cartesian)
-      }
-
-      # Apply scale_color_discrete(guide = FALSE) when color_var is NULL
-      if (is.null(config$color_var)) {
-        plot <- plot + scale_color_discrete(guide = "none")
-      }
+    # Add titles and labels
+    if (!is.null(config$title)) plot <- plot + ggtitle(config$title)
+    if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label)
+    if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
+    if (!is.null(config$coord_cartesian)) plot <- plot + coord_cartesian(ylim = config$coord_cartesian)

    # Add interactive tooltips for plotly
-      tooltip_vars <- c()
-      if (config$plot_type == "scatter") {
-        if (!is.null(config$delta_bg_point) && config$delta_bg_point) {
-          tooltip_vars <- c(tooltip_vars, "OrfRep", "Gene", "delta_bg")
-        } else if (!is.null(config$gene_point) && config$gene_point) {
-          tooltip_vars <- c(tooltip_vars, "OrfRep", "Gene")
-        } else if (!is.null(config$y_var) && !is.null(config$x_var)) {
-          tooltip_vars <- c(config$x_var, config$y_var)
-        }
-      }
+    tooltip_vars <- ifelse(!is.null(config$tooltip_vars), config$tooltip_vars, "none")
+    plotly_plot <- suppressWarnings(plotly::ggplotly(plot, tooltip = tooltip_vars))

-      # Convert to plotly object and suppress warnings here
-      plotly_plot <- suppressWarnings({
-        if (length(tooltip_vars) > 0) {
-          plotly::ggplotly(plot, tooltip = tooltip_vars)
-        } else {
-          plotly::ggplotly(plot, tooltip = "none")
-        }
-      })
-
-      # Adjust legend position if specified
+    # Adjust legend position in plotly
    if (!is.null(config$legend_position) && config$legend_position == "bottom") {
      plotly_plot <- plotly_plot %>% layout(legend = list(orientation = "h"))
    }

-      # Add plots to lists
+    # Add static and interactive plots to lists
    static_plots[[i]] <- plot
    plotly_plots[[i]] <- plotly_plot
  }

-    # Save static PDF plot(s) for the current grid
+  # Save static PDF plots
  pdf(file.path(out_dir, paste0(filename, ".pdf")), width = 16, height = 9)
  grid.arrange(grobs = static_plots, ncol = grid_ncol, nrow = grid_nrow)
  dev.off()

-    combined_plot <- plotly::subplot(
-      plotly_plots,
-      nrows = grid_nrow,
-      ncols = grid_ncol,
-      margin = 0.05
-    )
-
-    # Save combined HTML plot(s)
+  # Save combined interactive HTML plots
+  combined_plot <- plotly::subplot(plotly_plots, nrows = grid_nrow, ncols = grid_ncol, margin = 0.05)
  html_file <- file.path(out_dir, paste0(filename, ".html"))
  saveWidget(combined_plot, file = html_file, selfcontained = TRUE)
-  }
 }

 generate_scatter_plot <- function(plot, config) {
@@ -671,51 +610,39 @@ generate_box_plot <- function(plot, config) {
  return(plot)
 }

-generate_plate_analysis_plot_configs <- function(variables, stages = c("before", "after"),
-  df_before = NULL, df_after = NULL, plot_type = "scatter") {
+generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df_after = NULL,
+  plot_type = "scatter", stages = c("before", "after")) {
  plots <- list()
+  
  for (var in variables) {
    for (stage in stages) {
      df_plot <- if (stage == "before") df_before else df_after

      # Check for non-finite values in the y-variable
-      df_plot_filtered <- df_plot %>%
-        filter(is.finite(!!sym(var)))
-      
-      # Count removed rows
-      removed_rows <- nrow(df_plot) - nrow(df_plot_filtered)
-      if (removed_rows > 0) {
-        message(sprintf("Removed %d non-finite values for variable %s during stage %s", removed_rows, var, stage))
-      }
+      df_plot_filtered <- df_plot %>% filter(is.finite(!!sym(var)))

      # Adjust settings based on plot_type
-      if (plot_type == "scatter") {
-        error_bar <- TRUE
-        position <- "jitter"
-      } else if (plot_type == "box") {
-        error_bar <- FALSE
-        position <- NULL
-      }
-      
      config <- list(
-        df = df_plot,
+        df = df_plot_filtered,
        x_var = "scan",
        y_var = var,
        plot_type = plot_type,
        title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
-        error_bar = error_bar,
-        color_var = "conc_num_factor_zeroed",
-        position = position,
-        size = 0.2
+        color_var = "conc_num_factor_factor",
+        position = if (plot_type == "scatter") "jitter" else NULL,
+        size = 0.2,
+        error_bar = (plot_type == "scatter")
      )
+
+      # Add config to plots list
      plots <- append(plots, list(config))
    }
  }
-  return(plots)
+
+  return(list(grid_layout = list(ncol = 1, nrow = length(plots)), plots = plots))
 }

 generate_interaction_plot_configs <- function(df, limits_map = NULL, plot_type = "reference") {
-  # Define limits if not provided
  if (is.null(limits_map)) {
    limits_map <- list(
      L = c(0, 130),
@@ -729,94 +656,74 @@ generate_interaction_plot_configs <- function(df, limits_map = NULL, plot_type =
    )
  }

-  # Define grouping variables and filter data based on plot type
-  if (plot_type == "reference") {
-    group_vars <- c("OrfRep", "Gene", "num")
+  group_vars <- if (plot_type == "reference") c("OrfRep", "Gene", "num") else c("OrfRep", "Gene")
  df_filtered <- df %>%
-      mutate(
-        OrfRepCombined = paste(OrfRep, Gene, num, sep = "_")
-      )
-  } else if (plot_type == "deletion") {
-    group_vars <- c("OrfRep", "Gene")
-    df_filtered <- df %>%
-      mutate(
-        OrfRepCombined = paste(OrfRep, Gene, sep = "_") # Compare by OrfRep and Gene for deletion
-      )
-  }
+    mutate(OrfRepCombined = if (plot_type == "reference") paste(OrfRep, Gene, num, sep = "_") else paste(OrfRep, Gene, sep = "_"))

-  # Create a list to store all configs
-  configs <- list()
+  plots <- list()

-  # Generate the first 8 scatter/box plots for L, K, r, AUC (shared between reference and deletion)
-  overall_vars <- c("L", "K", "r", "AUC")
-  for (var in overall_vars) {
+  # Generate plots for overall variables (L, K, r, AUC)
+  for (var in c("L", "K", "r", "AUC")) {
    y_limits <- limits_map[[var]]

    config <- list(
      df = df_filtered,
      plot_type = "scatter",
-      x_var = "conc_num_factor_new",
+      x_var = "conc_num_factor_factor",
      y_var = var,
      x_label = unique(df_filtered$Drug)[1],
      title = sprintf("Scatter RF for %s with SD", var),
      coord_cartesian = y_limits,
      error_bar = TRUE,
-      x_breaks = unique(df_filtered$conc_num_factor_new),
+      x_breaks = unique(df_filtered$conc_num_factor_factor),
      x_labels = as.character(unique(df_filtered$conc_num)),
      grid_layout = list(ncol = 2, nrow = 2),
-      position = "jitter"
+      position = "jitter",
+      smooth = TRUE
    )
-    configs <- append(configs, list(config))
+    plots <- append(plots, list(config))
  }

-  # Generate Delta comparison plots (4x3 grid for deletion and reference)
+  # Generate Delta comparison plots
  unique_groups <- df_filtered %>% select(all_of(group_vars)) %>% distinct()

  for (i in seq_len(nrow(unique_groups))) {
    group <- unique_groups[i, ]
-    group_data <- df_filtered %>% filter(across(all_of(group_vars), ~ . == group[[cur_column()]]))
+    group_data <- df_filtered %>% semi_join(group, by = group_vars)

    OrfRep <- as.character(group$OrfRep)
    Gene <- if ("Gene" %in% names(group)) as.character(group$Gene) else ""
    num <- if ("num" %in% names(group)) as.character(group$num) else ""
-    OrfRepCombined <- paste(OrfRep, Gene, num, sep = "_")

-    # Generate plots for Delta variables
-    delta_vars <- c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC")
-    for (var in delta_vars) {
+    for (var in c("Delta_L", "Delta_K", "Delta_r", "Delta_AUC")) {
      y_limits <- limits_map[[var]]
-      upper_y <- y_limits[2]
-      lower_y <- y_limits[1]
-      y_span <- upper_y - lower_y
+      y_span <- y_limits[2] - y_limits[1]
      
-      # Get WT_sd_var for error bar calculations
+      # Error bars
      WT_sd_var <- paste0("WT_sd_", sub("Delta_", "", var))
      WT_sd_value <- group_data[[WT_sd_var]][1]
      error_bar_ymin <- 0 - (2 * WT_sd_value)
      error_bar_ymax <- 0 + (2 * WT_sd_value)
      
-      # Set annotations (Z_Shifts, lm Z-Scores, NG, DB, SM values)
-      Z_Shift_var <- paste0("Z_Shift_", sub("Delta_", "", var))
-      Z_lm_var <- paste0("Z_lm_", sub("Delta_", "", var))
-      Z_Shift_value <- round(group_data[[Z_Shift_var]][1], 2)
-      Z_lm_value <- round(group_data[[Z_lm_var]][1], 2)
+      # Annotations
+      Z_Shift_value <- round(group_data[[paste0("Z_Shift_", sub("Delta_", "", var))]][1], 2)
+      Z_lm_value <- round(group_data[[paste0("Z_lm_", sub("Delta_", "", var))]][1], 2)
      NG_value <- group_data$NG[1]
      DB_value <- group_data$DB[1]
      SM_value <- group_data$SM[1]

      annotations <- list(
-        list(x = 1, y = upper_y - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)),
-        list(x = 1, y = upper_y - 0.3 * y_span, label = paste("lm ZScore =", Z_lm_value)),
-        list(x = 1, y = lower_y + 0.2 * y_span, label = paste("NG =", NG_value)),
-        list(x = 1, y = lower_y + 0.1 * y_span, label = paste("DB =", DB_value)),
-        list(x = 1, y = lower_y, label = paste("SM =", SM_value))
+        list(x = 1, y = y_limits[2] - 0.2 * y_span, label = paste("ZShift =", Z_Shift_value)),
+        list(x = 1, y = y_limits[2] - 0.3 * y_span, label = paste("lm ZScore =", Z_lm_value)),
+        list(x = 1, y = y_limits[1] + 0.2 * y_span, label = paste("NG =", NG_value)),
+        list(x = 1, y = y_limits[1] + 0.1 * y_span, label = paste("DB =", DB_value)),
+        list(x = 1, y = y_limits[1], label = paste("SM =", SM_value))
      )

-      # Create configuration for each Delta plot
      config <- list(
        df = group_data,
        plot_type = "scatter",
-        x_var = "conc_num",
+        x_var = "conc_num_factor_factor",
        y_var = var,
        x_label = unique(group_data$Drug)[1],
        title = paste(OrfRep, Gene, sep = "      "),
@@ -827,23 +734,21 @@ generate_interaction_plot_configs <- function(df, limits_map = NULL, plot_type =
          ymin = error_bar_ymin,
          ymax = error_bar_ymax
        ),
-        lm_smooth = TRUE,
-        x_breaks = unique(group_data$conc_num_factor_new),
+        smooth = TRUE,
+        x_breaks = unique(group_data$conc_num_factor_factor),
        x_labels = as.character(unique(group_data$conc_num)),
        ylim_vals = y_limits,
-        grid_layout = list(ncol = 4, nrow = 3) # Adjust grid layout for gene-gene comparisons
+        grid_layout = list(ncol = 4, nrow = 3)
      )
-      configs <- append(configs, list(config))
+      plots <- append(plots, list(config))
    }
  }

-  return(configs)
+  return(list(grid_layout = list(ncol = 2, nrow = 2), plots = plots))
 }

 generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FALSE, overlap_color = FALSE) {
-  
  sd_bands <- c(1, 2, 3)
-
  avg_zscore_cols <- paste0("Avg_Zscore_", variables)
  z_lm_cols <- paste0("Z_lm_", variables)
  rank_avg_zscore_cols <- paste0("Rank_", variables)
@@ -851,8 +756,8 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA

  configs <- list()

+  # Adjust values if necessary
  if (adjust) {
-    message("Replacing NA with 0.001 for Avg_Zscore_ and Z_lm_ columns for ranks")
    df <- df %>%
      mutate(
        across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
@@ -860,34 +765,22 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
      )
  }

-  message("Calculating ranks for Avg_Zscore and Z_lm columns")
-  rank_col_mapping <- setNames(rank_avg_zscore_cols, avg_zscore_cols)
+  # Calculate rank columns for Avg_Zscore and Z_lm columns
  df_ranked <- df %>%
-    mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = "{rank_col_mapping[.col]}"))
+    mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = paste0("Rank_", avg_zscore_cols))) %>%
+    mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = paste0("Rank_lm_", z_lm_cols)))

-  rank_lm_col_mapping <- setNames(rank_z_lm_cols, z_lm_cols)
-  df_ranked <- df_ranked %>%
-    mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = "{rank_lm_col_mapping[.col]}"))
-  
-  # SD-based plots for L and K
+  # Generate plots for SD-based L and K variables
  for (variable in c("L", "K")) {
-    
-    if (is_lm) {
-      rank_var <- paste0("Rank_lm_", variable)
-      zscore_var <- paste0("Z_lm_", variable)
-      y_label <- paste("Int Z score", variable)
-    } else {
-      rank_var <- paste0("Rank_", variable)
-      zscore_var <- paste0("Avg_Zscore_", variable)
-      y_label <- paste("Avg Z score", variable)
-    }
+    rank_var <- if (is_lm) paste0("Rank_lm_", variable) else paste0("Rank_", variable)
+    zscore_var <- if (is_lm) paste0("Z_lm_", variable) else paste0("Avg_Zscore_", variable)
+    y_label <- if (is_lm) paste("Int Z score", variable) else paste("Avg Z score", variable)

    for (sd_band in sd_bands) {
-      
      num_enhancers <- sum(df_ranked[[zscore_var]] >= sd_band, na.rm = TRUE)
      num_suppressors <- sum(df_ranked[[zscore_var]] <= -sd_band, na.rm = TRUE)

-      # Annotated plot configuration
+      # Plot with annotations
      configs[[length(configs) + 1]] <- list(
        df = df_ranked,
        x_var = rank_var,
@@ -903,27 +796,22 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
          list(
            x = median(df_ranked[[rank_var]], na.rm = TRUE),
            y = max(df_ranked[[zscore_var]], na.rm = TRUE) * 0.9,
-            label = paste("Deletion Enhancers =", num_enhancers),
-            hjust = 0.5,
-            vjust = 1
+            label = paste("Deletion Enhancers =", num_enhancers)
          ),
          list(
            x = median(df_ranked[[rank_var]], na.rm = TRUE),
            y = min(df_ranked[[zscore_var]], na.rm = TRUE) * 0.9,
-            label = paste("Deletion Suppressors =", num_suppressors),
-            hjust = 0.5,
-            vjust = 0
+            label = paste("Deletion Suppressors =", num_suppressors)
          )
        ),
        shape = 3,
        size = 0.1,
        y_label = y_label,
        x_label = "Rank",
-        legend_position = "none",
-        grid_layout = list(ncol = 3, nrow = 2)
+        legend_position = "none"
      )

-      # Non-Annotated Plot Configuration
+      # Plot without annotations
      configs[[length(configs) + 1]] <- list(
        df = df_ranked,
        x_var = rank_var,
@@ -940,42 +828,26 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
        size = 0.1,
        y_label = y_label,
        x_label = "Rank",
-        legend_position = "none",
-        grid_layout = list(ncol = 3, nrow = 2)
+        legend_position = "none"
      )
    }
  }

-  # Avg ZScore and Rank Avg ZScore Plots for variables
+  # Generate Avg ZScore and Rank Avg ZScore plots for each variable
  for (variable in variables) {
    for (plot_type in c("Avg Zscore vs lm", "Rank Avg Zscore vs lm")) {
-
      title <- paste(plot_type, variable)

      # Define specific variables based on plot type
-      if (plot_type == "Avg Zscore vs lm") {
-        x_var <- paste0("Avg_Zscore_", variable)
-        y_var <- paste0("Z_lm_", variable)
-        rectangles <- list(
-          list(xmin = -2, xmax = 2, ymin = -2, ymax = 2,
-            fill = NA, color = "grey20", alpha = 0.1
-          )
-        )
-      } else if (plot_type == "Rank Avg Zscore vs lm") {
-        x_var <- paste0("Rank_", variable)
-        y_var <- paste0("Rank_lm_", variable)
-        rectangles <- NULL
-      }
+      x_var <- if (plot_type == "Avg Zscore vs lm") paste0("Avg_Zscore_", variable) else paste0("Rank_", variable)
+      y_var <- if (plot_type == "Avg Zscore vs lm") paste0("Z_lm_", variable) else paste0("Rank_lm_", variable)

      # Fit the linear model
      lm_model <- lm(as.formula(paste(y_var, "~", x_var)), data = df_ranked)
-      
-      # Extract intercept, slope, and R-squared from the model coefficients
      intercept <- coef(lm_model)[1]
      slope <- coef(lm_model)[2]
      r_squared <- summary(lm_model)$r.squared

-      # Annotations: include R-squared in the plot
      annotations <- list(
        list(
          x = mean(range(df_ranked[[x_var]], na.rm = TRUE)),
@@ -987,6 +859,12 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
        )
      )

+      rectangles <- if (plot_type == "Avg Zscore vs lm") {
+        list(list(xmin = -2, xmax = 2, ymin = -2, ymax = 2, fill = NA, color = "grey20", alpha = 0.1))
+      } else {
+        NULL
+      }
+
      configs[[length(configs) + 1]] <- list(
        df = df_ranked,
        x_var = x_var,
@@ -1007,11 +885,11 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
      )
    }
  }
-  return(configs)
+
+  return(list(grid_layout = list(ncol = 3, nrow = 2), plots = configs))
 }

 generate_correlation_plot_configs <- function(df, highlight_cyan = FALSE) {
-  # Define relationships for plotting
  relationships <- list(
    list(x = "Z_lm_L", y = "Z_lm_K", label = "Interaction L vs. Interaction K"),
    list(x = "Z_lm_L", y = "Z_lm_r", label = "Interaction L vs. Interaction r"),
@@ -1021,56 +899,36 @@ generate_correlation_plot_configs <- function(df, highlight_cyan = FALSE) {
    list(x = "Z_lm_r", y = "Z_lm_AUC", label = "Interaction r vs. Interaction AUC")
  )

-  configs <- list()
+  plots <- list()

  for (rel in relationships) {
-    # Fit linear model
    lm_model <- lm(as.formula(paste(rel$y, "~", rel$x)), data = df)
-    lm_summary <- summary(lm_model)
-    intercept <- coef(lm_model)[1]
-    slope <- coef(lm_model)[2]
-    r_squared <- lm_summary$r.squared
+    r_squared <- summary(lm_model)$r.squared

-    # Construct plot configuration
    config <- list(
      df = df,
      x_var = rel$x,
      y_var = rel$y,
      plot_type = "scatter",
      title = rel$label,
-      x_label = paste("z-score", gsub("Z_lm_", "", rel$x)),
-      y_label = paste("z-score", gsub("Z_lm_", "", rel$y)),
      annotations = list(
-        list(
-          x = mean(range(df[[rel$x]], na.rm = TRUE)),
-          y = mean(range(df[[rel$y]], na.rm = TRUE)),
-          label = paste("R-squared =", round(r_squared, 3)),
-          hjust = 0.5,
-          vjust = 1,
-          size = 5,
-          color = "black"
-        )
+        list(x = mean(df[[rel$x]], na.rm = TRUE),
+             y = mean(df[[rel$y]], na.rm = TRUE),
+             label = paste("R-squared =", round(r_squared, 3)))
      ),
      smooth = TRUE,
      smooth_color = "tomato3",
-      lm_line = list(intercept = intercept, slope = slope),
-      legend_position = "right",
+      lm_line = list(intercept = coef(lm_model)[1], slope = coef(lm_model)[2]),
      shape = 3,
      size = 0.5,
      color_var = "Overlap",
-      rectangles = list(
-        list(
-          xmin = -2, xmax = 2, ymin = -2, ymax = 2,
-          fill = NA, color = "grey20", alpha = 0.1
-        )
-      ),
-      cyan_points = highlight_cyan,  # Toggle cyan point highlighting
+      cyan_points = highlight_cyan
    )

-    configs[[length(configs) + 1]] <- config
+    plots <- append(plots, list(config))
  }

-  return(configs)
+  return(list(grid_layout = list(ncol = 3, nrow = 2), plots = plots))
 }

 main <- function() {
@@ -1085,8 +943,6 @@ main <- function() {

    summary_vars <- c("L", "K", "r", "AUC", "delta_bg") # fields to filter and calculate summary stats across
    interaction_vars <- c("L", "K", "r", "AUC") # fields to calculate interaction z-scores
-    print_vars <- c("OrfRep", "Plate", "scan", "Col", "Row", "num", "OrfRep", "conc_num", "conc_num_factor",
-      "delta_bg_tolerance", "delta_bg", "Gene", "L", "K", "r", "AUC", "NG", "DB")
    
    message("Loading and filtering data for experiment: ", exp_name)
    df <- load_and_filter_data(args$easy_results_file, sd = exp_sd) %>%
@@ -1099,7 +955,7 @@ main <- function() {
    df_no_zeros <- df_na %>% filter(L > 0) # formerly X_noZero
    
    # Save some constants
-    max_conc <- max(df$conc_num_factor_zeroed_num)
+    max_conc <- max(df$conc_num_factor)
    l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
    k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2

@@ -1107,13 +963,13 @@ main <- function() {
    df_stats <- calculate_summary_stats(
      df = df,
      variables = summary_vars,
-      group_vars = c("conc_num", "conc_num_factor"))$df_with_stats
+      group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))$df_with_stats

    message("Calculating summary statistics after quality control")
    ss <- calculate_summary_stats(
      df = df_na,
      variables = summary_vars,
-      group_vars = c("conc_num", "conc_num_factor"))
+      group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))
    df_na_ss <- ss$summary_stats
    df_na_stats <- ss$df_with_stats
    write.csv(df_na_ss, file = file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
@@ -1150,7 +1006,7 @@ main <- function() {
    df_no_zeros_stats <- calculate_summary_stats(
      df = df_no_zeros,
      variables = summary_vars,
-      group_vars = c("conc_num", "conc_num_factor")
+      group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor")
    )$df_with_stats

    message("Filtering by 2SD of K")
@@ -1161,13 +1017,13 @@ main <- function() {

    message("Calculating summary statistics for L within 2SD of K")
    # TODO We're omitting the original z_max calculation, not sure if needed?
-    ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))$summary_stats
+    ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))$summary_stats
    write.csv(ss,
      file = file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2sd_K.csv"),
      row.names = FALSE)
    
    message("Calculating summary statistics for L outside 2SD of K")
-    ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
+    ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor", "conc_num_factor_factor"))
    df_na_l_outside_2sd_k_stats <- ss$df_with_stats
    write.csv(ss$summary_stats,
      file = file.path(out_dir, "max_observed_L_vals_for_spots_outside_2sd_K.csv"),
@@ -1175,54 +1031,64 @@ main <- function() {
    
    # Each plots list corresponds to a file
    l_vs_k_plot_configs <- list(
+      grid_layout = list(ncol = 1, nrow = 1),
+      plots = list(
        list(
          df = df,
          x_var = "L",
          y_var = "K",
          plot_type = "scatter",
-        delta_bg_point = TRUE,
+          tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
          title = "Raw L vs K before quality control",
-        color_var = "conc_num_factor_zeroed",
+          color_var = "conc_num_factor_factor",
          error_bar = FALSE,
          legend_position = "right"
        )
      )
+    )

    frequency_delta_bg_plot_configs <- list(
+      grid_layout = list(ncol = 1, nrow = 1),
+      plots = list(
        list(
          df = df_stats,
          x_var = "delta_bg",
          y_var = NULL,
          plot_type = "density",
          title = "Density plot for Delta Background by [Drug] (All Data)",
-        color_var = "conc_num_factor_zeroed",
+          color_var = "conc_num_factor_factor",
          x_label = "Delta Background",
          y_label = "Density",
          error_bar = FALSE,
-        legend_position = "right"),
+          legend_position = "right"
+        ),
        list(
          df = df_stats,
          x_var = "delta_bg",
          y_var = NULL,
          plot_type = "bar",
          title = "Bar plot for Delta Background by [Drug] (All Data)",
-        color_var = "conc_num_factor_zeroed",
+          color_var = "conc_num_factor_factor",
          x_label = "Delta Background",
          y_label = "Count",
          error_bar = FALSE,
-        legend_position = "right")
+          legend_position = "right"
+        )
+      )
    )

    above_threshold_plot_configs <- list(
+      grid_layout = list(ncol = 1, nrow = 1),
+      plots = list(
        list(
          df = df_above_tolerance,
          x_var = "L",
          y_var = "K",
          plot_type = "scatter",
-        delta_bg_point = TRUE,
+          tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
          title = paste("Raw L vs K for strains above Delta Background threshold of",
            round(df_above_tolerance$delta_bg_tolerance[[1]], 3), "or above"),
-        color_var = "conc_num_factor_zeroed",
+          color_var = "conc_num_factor_factor",
          position = "jitter",
          annotations = list(
            list(
@@ -1235,6 +1101,7 @@ main <- function() {
          legend_position = "right"
        )
      )
+    )

    plate_analysis_plot_configs <- generate_plate_analysis_plot_configs(
      variables = summary_vars,
@@ -1263,59 +1130,84 @@ main <- function() {
    )

    l_outside_2sd_k_plot_configs <- list(
+      grid_layout = list(ncol = 1, nrow = 1),  # Ensures it's compatible with generate_and_save_plots
+      plots = list(
        list(
          df = df_na_l_outside_2sd_k_stats,
          x_var = "L",
          y_var = "K",
          plot_type = "scatter",
-        delta_bg_point = TRUE,
          title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
-        color_var = "conc_num_factor_zeroed",
-        position = "jitter",
+          color_var = "conc_num_factor_factor",
+          position = "jitter",  # Apply jitter for better visibility
+          tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
+          annotations = list(
+            list(
+              x = mean(df_na_l_outside_2sd_k_stats$L, na.rm = TRUE),
+              y = mean(df_na_l_outside_2sd_k_stats$K, na.rm = TRUE),
+              label = paste("Total strains:", nrow(df_na_l_outside_2sd_k_stats))
+            )
+          ),
+          error_bar = FALSE,  # No error bars for this plot
          legend_position = "right"
        )
      )
+    )

    delta_bg_outside_2sd_k_plot_configs <- list(
+      grid_layout = list(ncol = 1, nrow = 1),  # Ensures it's compatible with generate_and_save_plots
+      plots = list(
        list(
          df = df_na_l_outside_2sd_k_stats,
          x_var = "delta_bg",
          y_var = "K",
          plot_type = "scatter",
-        gene_point = TRUE,
          title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
-        color_var = "conc_num_factor_zeroed",
-        position = "jitter",
+          color_var = "conc_num_factor_factor",
+          position = "jitter",  # Apply jitter for better visibility
+          tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
+          annotations = list(
+            list(
+              x = mean(df_na_l_outside_2sd_k_stats$delta_bg, na.rm = TRUE),
+              y = mean(df_na_l_outside_2sd_k_stats$K, na.rm = TRUE),
+              label = paste("Total strains:", nrow(df_na_l_outside_2sd_k_stats))
+            )
+          ),
+          error_bar = FALSE,  # No error bars for this plot
          legend_position = "right"
        )
      )
+    )

    message("Generating quality control plots in parallel")
-    # future::plan(future::multicore, workers = parallel::detectCores())
-    future::plan(future::multisession, workers = 3) # generate 3 plots in parallel

-    plot_configs <- list(
-      list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control",
-        plot_configs = l_vs_k_plot_configs),
-      list(out_dir = out_dir_qc, filename = "frequency_delta_background",
-        plot_configs = frequency_delta_bg_plot_configs),
-      list(out_dir = out_dir_qc, filename = "L_vs_K_above_threshold",
-        plot_configs = above_threshold_plot_configs),
-      list(out_dir = out_dir_qc, filename = "plate_analysis",
-        plot_configs = plate_analysis_plot_configs),
-      list(out_dir = out_dir_qc, filename = "plate_analysis_boxplots",
-        plot_configs = plate_analysis_boxplot_configs),
-      list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros",
-        plot_configs = plate_analysis_no_zeros_plot_configs),
-      list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros_boxplots",
-        plot_configs = plate_analysis_no_zeros_boxplot_configs),
-      list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
-        plot_configs = l_outside_2sd_k_plot_configs),
-      list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2sd_outside_mean_K",
-        plot_configs = delta_bg_outside_2sd_k_plot_configs)
-    )
+    generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plot_configs)
+    quit()
+    # # future::plan(future::multicore, workers = parallel::detectCores())
+    # future::plan(future::multisession, workers = 3) # generate 3 plots in parallel

-    # Generating quality control plots in parallel
+    # plot_configs <- list(
+    #   list(out_dir = out_dir_qc, filename = "L_vs_K_before_quality_control",
+    #     plot_configs = l_vs_k_plot_configs),
+    #   list(out_dir = out_dir_qc, filename = "frequency_delta_background",
+    #     plot_configs = frequency_delta_bg_plot_configs),
+    #   list(out_dir = out_dir_qc, filename = "L_vs_K_above_threshold",
+    #     plot_configs = above_threshold_plot_configs),
+    #   list(out_dir = out_dir_qc, filename = "plate_analysis",
+    #     plot_configs = plate_analysis_plot_configs),
+    #   list(out_dir = out_dir_qc, filename = "plate_analysis_boxplots",
+    #     plot_configs = plate_analysis_boxplot_configs),
+    #   list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros",
+    #     plot_configs = plate_analysis_no_zeros_plot_configs),
+    #   list(out_dir = out_dir_qc, filename = "plate_analysis_no_zeros_boxplots",
+    #     plot_configs = plate_analysis_no_zeros_boxplot_configs),
+    #   list(out_dir = out_dir_qc, filename = "L_vs_K_for_strains_2SD_outside_mean_K",
+    #     plot_configs = l_outside_2sd_k_plot_configs),
+    #   list(out_dir = out_dir_qc, filename = "delta_background_vs_K_for_strains_2sd_outside_mean_K",
+    #     plot_configs = delta_bg_outside_2sd_k_plot_configs)
+    # )
+
+    # # Generating quality control plots in parallel
    # furrr::future_map(plot_configs, function(config) {
    #   generate_and_save_plots(config$out_dir, config$filename, config$plot_configs)
    # }, .options = furrr_options(seed = TRUE))
@@ -1340,7 +1232,7 @@ main <- function() {
      
      # Recalculate summary statistics for the background strain
      message("Calculating summary statistics for background strain")
-      ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
+      ss_bg <- calculate_summary_stats(df_bg, summary_vars, group_vars = c("OrfRep", "conc_num", "conc_num_factor", "conc_num_factor_factor"))
      summary_stats_bg <- ss_bg$summary_stats
      write.csv(summary_stats_bg,
        file = file.path(out_dir, paste0("summary_stats_background_strain_", strain, ".csv")),
@@ -1351,7 +1243,7 @@ main <- function() {
      df_reference <- df_na_stats %>% # formerly X2_RF
        filter(OrfRep == strain) %>%
        filter(!is.na(L)) %>%
-        group_by(conc_num, conc_num_factor) %>%
+        group_by(conc_num, conc_num_factor, conc_num_factor_factor) %>%
        mutate(
          max_l_theoretical = max(max_L, na.rm = TRUE),
          L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
@@ -1364,7 +1256,7 @@ main <- function() {
        filter(OrfRep != strain) %>%
        filter(!is.na(L)) %>%
        mutate(SM = 0) %>%
-        group_by(conc_num, conc_num_factor) %>%
+        group_by(conc_num, conc_num_factor, conc_num_factor_factor) %>%
        mutate(
          max_l_theoretical = max(max_L, na.rm = TRUE),
          L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
@@ -1376,7 +1268,7 @@ main <- function() {
      df_reference_stats <- calculate_summary_stats(
        df = df_reference,
        variables = interaction_vars,
-        group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor")
+        group_vars = c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor")
        )$df_with_stats
      reference_results <- calculate_interaction_scores(df_reference_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene", "num"))
      zscore_calculations_reference <- reference_results$calculations
@@ -1387,9 +1279,9 @@ main <- function() {
      df_deletion_stats <- calculate_summary_stats(
        df = df_deletion,
        variables = interaction_vars,
-        group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor")
+        group_vars = c("OrfRep", "Gene", "conc_num", "conc_num_factor", "conc_num_factor_factor")
        )$df_with_stats
-      deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep"))
+      deletion_results <- calculate_interaction_scores(df_deletion_stats, max_conc, bg_stats, group_vars = c("OrfRep", "Gene"))
      zscore_calculations <- deletion_results$calculations
      zscore_interactions <- deletion_results$interactions
      zscore_interactions_joined <- deletion_results$full_data