Convert tidy symbols

2024-10-06 21:56:51 -04:00
parent 214397e6ee
commit 642c74fa62
1 changed files with 136 additions and 129 deletions
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -528,7 +528,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
      # Filter NAs
      if (!is.null(config$filter_na) && config$filter_na) {
        df <- df %>%
-          filter(!is.na(.data[[config$y_var]]))
+          filter(!is.na(!!sym(config$y_var)))
      }

      # TODO for now skip all NA plots NA data
@@ -539,17 +539,17 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs, page_width
      }

      # Create initial aes mappings for all plot types
-      aes_mapping <- aes(x = .data[[config$x_var]])  # required
+      aes_mapping <- aes(x = !!sym(config$x_var))  # required
      if (!is.null(config$y_var)) {
-        aes_mapping <- modifyList(aes_mapping, aes(y = .data[[config$y_var]])) # optional for density/bar plots
+        aes_mapping <- modifyList(aes_mapping, aes(y = !!sym(config$y_var))) # optional for density/bar plots
      }
      if (!is.null(config$color_var)) {
-        aes_mapping <- modifyList(aes_mapping, aes(color = .data[[config$color_var]])) # dynamic color_var
+        aes_mapping <- modifyList(aes_mapping, aes(color = !!sym(config$color_var))) # dynamic color_var
      } else if (!is.null(config$color)) {
        aes_mapping <- modifyList(aes_mapping, aes(color = config$color)) # static color
      }
      if (config$plot_type == "bar" && !is.null(config$color_var)) {
-        aes_mapping <- modifyList(aes_mapping, aes(fill = .data[[config$color_var]])) # only fill bar plots
+        aes_mapping <- modifyList(aes_mapping, aes(fill = !!sym(config$color_var))) # only fill bar plots
      }

      # Begin plot generation
@@ -642,12 +642,11 @@ generate_scatter_plot <- function(plot, config) {
  # Add a cyan point for the reference data for correlation plots
  if (!is.null(config$cyan_points) && config$cyan_points) {
    plot <- plot + geom_point(
+      aes(x = !!sym(config$x_var), y = !!sym(config$y_var)),
      data = config$df_reference,
-      mapping = aes(x = .data[[config$x_var]], y = .data[[config$y_var]]),
      color = "cyan",
      shape = 3,
-      size = 0.5,
-      inherit.aes = FALSE
+      size = 0.5
    )
  }

@@ -685,9 +684,9 @@ generate_scatter_plot <- function(plot, config) {
        # Only use color_var if it's present in the dataframe
        plot <- plot + geom_errorbar(
          aes(
-            ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
-            ymax = .data[[y_mean_col]] + .data[[y_sd_col]],
-            color = .data[[config$color_var]]
+            ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
+            ymax = !!sym(y_mean_col) + !!sym(y_sd_col),
+            color = !!sym(config$color_var)
          ),
          linewidth = 0.1
        )
@@ -695,8 +694,8 @@ generate_scatter_plot <- function(plot, config) {
        # If color_var is missing, fall back to a default color or none
        plot <- plot + geom_errorbar(
          aes(
-            ymin = .data[[y_mean_col]] - .data[[y_sd_col]],
-            ymax = .data[[y_mean_col]] + .data[[y_sd_col]]
+            ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
+            ymax = !!sym(y_mean_col) + !!sym(y_sd_col)
          ),
          color = config$error_bar_params$color, # use the provided color or default
          linewidth = ifelse(is.null(config$error_bar_params$linewidth), 0.1, config$error_bar_params$linewidth)
@@ -708,16 +707,14 @@ generate_scatter_plot <- function(plot, config) {
    if (!is.null(config$error_bar_params$mean_point) && config$error_bar_params$mean_point) {
      if (!is.null(config$error_bar_params$color)) {
        plot <- plot + geom_point(
-          mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]),  # Include both x and y mappings
+          aes(x = !!sym(config$x_var), y = !!sym(y_mean_col)),
          color = config$error_bar_params$color,
-          shape = 16,
-          inherit.aes = FALSE  # Prevent overriding global aesthetics
+          shape = 16
        )
      } else {
        plot <- plot + geom_point(
-          mapping = aes(x = .data[[config$x_var]], y = .data[[y_mean_col]]),  # Include both x and y mappings
-          shape = 16,
-          inherit.aes = FALSE  # Prevent overriding global aesthetics
+          aes(x = !!sym(config$x_var), y = !!sym(y_mean_col)),
+          shape = 16
        )
      }
    }
@@ -726,30 +723,30 @@ generate_scatter_plot <- function(plot, config) {
  # Add linear regression line if specified
  if (!is.null(config$lm_line)) {
    # Extract necessary values
-    x_min <- config$lm_line$x_min
-    x_max <- config$lm_line$x_max
-    intercept <- config$lm_line$intercept
-    slope <- config$lm_line$slope
+    intercept <- config$lm_line$intercept # required
+    slope <- config$lm_line$slope # required
+    xmin <- ifelse(!is.null(config$lm_line$xmin), config$lm_line$xmin, min(as.numeric(config$df[[config$x_var]])))
+    xmax <- ifelse(!is.null(config$lm_line$xmax), config$lm_line$xmax, max(as.numeric(config$df[[config$x_var]])))
    color <- ifelse(!is.null(config$lm_line$color), config$lm_line$color, "blue")
    linewidth <- ifelse(!is.null(config$lm_line$linewidth), config$lm_line$linewidth, 1)
-    y_min <- intercept + slope * x_min
-    y_max <- intercept + slope * x_max
+    ymin <- intercept + slope * xmin
+    ymax <- intercept + slope * xmax
    
    # Ensure y-values are within y-limits (if any)
    if (!is.null(config$ylim_vals)) {
-      y_min_within_limits <- y_min >= config$ylim_vals[1] && y_min <= config$ylim_vals[2]
-      y_max_within_limits <- y_max >= config$ylim_vals[1] && y_max <= config$ylim_vals[2]
+      ymin_within_limits <- ymin >= config$ylim_vals[1] && ymin <= config$ylim_vals[2]
+      ymax_within_limits <- ymax >= config$ylim_vals[1] && ymax <= config$ylim_vals[2]

      # Adjust or skip based on whether the values fall within limits
-      if (y_min_within_limits && y_max_within_limits) {
+      if (ymin_within_limits && ymax_within_limits) {
        plot <- plot + annotate(
          "segment",
-          x = x_min,
-          xend = x_max,
-          y = y_min,
-          yend = y_max,
+          x = xmin,
+          xend = xmax,
+          y = ymin,
+          yend = ymax,
          color = color,
-          linewidth = linewidth
+          linewidth = linewidth,
        )
      } else {
        message("Skipping linear regression line due to y-values outside of limits")
@@ -758,10 +755,10 @@ generate_scatter_plot <- function(plot, config) {
      # If no y-limits are provided, proceed with the annotation
      plot <- plot + annotate(
        "segment",
-        x = x_min,
-        xend = x_max,
-        y = y_min,
-        yend = y_max,
+        x = xmin,
+        xend = xmax,
+        y = ymin,
+        yend = ymax,
        color = color,
        linewidth = linewidth
      )
@@ -837,7 +834,7 @@ generate_scatter_plot <- function(plot, config) {

 generate_boxplot <- function(plot, config) {
  # Convert x_var to a factor within aes mapping
-  plot <- plot + geom_boxplot(aes(x = factor(.data[[config$x_var]])))
+  plot <- plot + geom_boxplot(aes(x = factor(!!sym(config$x_var))))

  # Customize X-axis if specified
  if (!is.null(config$x_breaks) && !is.null(config$x_labels) && !is.null(config$x_label)) {
@@ -871,7 +868,7 @@ generate_plate_analysis_plot_configs <- function(variables, df_before = NULL, df
      df_plot <- if (stage == "before") df_before else df_after

      # Check for non-finite values in the y-variable
-      # df_plot_filtered <- df_plot %>% filter(is.finite(.data[[var]]))
+      # df_plot_filtered <- df_plot %>% filter(is.finite(!!sym(var)))

      # Adjust settings based on plot_type
      plot_config <- list(
@@ -950,7 +947,7 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
        )

        for (x_val in unique(df_summary$conc_num_factor_factor)) {
-          current_df <- df_summary %>% filter(.data[[plot_config$x_var]] == x_val)
+          current_df <- df_summary %>% filter(!!sym(plot_config$x_var) == x_val)
          annotations <- append(annotations, list(
            list(x = x_val, y = y_limits[1] + 0.08 * y_span, label = first(current_df$NG, default = 0), size = 4),
            list(x = x_val, y = y_limits[1] + 0.04 * y_span, label = first(current_df$DB, default = 0), size = 4),
@@ -1007,9 +1004,9 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)

      # Anti-filter to select out-of-bounds rows
      out_of_bounds <- group_data %>%
-        filter(is.na(.data[[y_var_name]]) |
-        .data[[y_var_name]] < y_limits[1] |
-        .data[[y_var_name]] > y_limits[2])
+        filter(is.na(!!sym(y_var_name)) |
+        !!sym(y_var_name) < y_limits[1] |
+        !!sym(y_var_name) > y_limits[2])
      
      if (nrow(out_of_bounds) > 0) {
        message(sprintf("Filtered %d row(s) from '%s' because %s is outside of y-limits: [%f, %f]",
@@ -1019,9 +1016,9 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)

      # Do the actual filtering
      group_data_filtered <- group_data %>%
-        filter(!is.na(.data[[y_var_name]]) &
-          .data[[y_var_name]] >= y_limits[1] &
-          .data[[y_var_name]] <= y_limits[2])
+        filter(!is.na(!!sym(y_var_name)) &
+          !!sym(y_var_name) >= y_limits[1] &
+          !!sym(y_var_name) <= y_limits[2])

      if (nrow(group_data_filtered) == 0) {
        message("Insufficient data for plot: ", OrfRepTitle, " ", var)
@@ -1039,8 +1036,8 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)

      lm_intercept_col <- paste0("lm_intercept_", var)
      lm_slope_col <- paste0("lm_slope_", var)
-      lm_intercept_value <- first(group_data_filtered[[lm_intercept_col]], default = 0)
-      lm_slope_value <- first(group_data_filtered[[lm_slope_col]], default = 0)
+      lm_intercept <- first(group_data_filtered[[lm_intercept_col]], default = 0)
+      lm_slope <- first(group_data_filtered[[lm_slope_col]], default = 0)
      
      plot_config <- list(
        df = group_data_filtered,
@@ -1073,12 +1070,10 @@ generate_interaction_plot_configs <- function(df_summary, df_interactions, type)
        x_labels = as.character(unique(group_data_filtered$conc_num)),
        ylim_vals = y_limits,
        lm_line = list(
-          intercept = lm_intercept_value,
-          slope = lm_slope_value,
+          intercept = lm_intercept,
+          slope = lm_slope,
          color = "blue",
-          linewidth = 0.8,
-          x_min = min(as.numeric(group_data_filtered$conc_num_factor_factor)),
-          x_max = max(as.numeric(group_data_filtered$conc_num_factor_factor))
+          linewidth = 0.8
        )
      )
      delta_plot_configs <- append(delta_plot_configs, list(plot_config))
@@ -1194,6 +1189,12 @@ generate_correlation_plot_configs <- function(df, df_reference) {
    list(x = "r", y = "AUC")
  )

+  # Filter both dataframes for missing linear model zscores for plotting
+  df <- df %>%
+    filter(!is.na(Z_lm_L))
+  df_reference <- df_reference %>%
+    filter(!is.na(Z_lm_L))
+
  plot_configs <- list()

  # Iterate over the option to highlight cyan points (TRUE/FALSE)
@@ -1205,11 +1206,14 @@ generate_correlation_plot_configs <- function(df, df_reference) {
      x_var <- paste0("Z_lm_", rel$x)
      y_var <- paste0("Z_lm_", rel$y)

-      # Extract the R-squared, intercept, and slope from the df
-      relationship_name <- paste0(rel$x, "_vs_", rel$y)
-      intercept <- df[[paste0("lm_intercept_", rel$x)]]
-      slope <- df[[paste0("lm_slope_", rel$x)]]
-      r_squared <- df[[paste0("lm_R_squared_", rel$x)]]
+      # Find the max and min of both dataframes for printing linear regression line
+      xmin <- min(c(min(df[[x_var]], na.rm = TRUE), min(df_reference[[x_var]], na.rm = TRUE)), na.rm = TRUE)
+      xmax <- max(c(max(df[[x_var]], na.rm = TRUE), max(df_reference[[x_var]], na.rm = TRUE)), na.rm = TRUE)
+
+      # Extract the R-squared, intercept, and slope from the df (first value)
+      intercept <- df[[paste0("lm_intercept_", rel$x)]][1]
+      slope <- df[[paste0("lm_slope_", rel$x)]][1]
+      r_squared <- df[[paste0("lm_R_squared_", rel$x)]][1]

      # Generate the label for the plot
      plot_label <- paste("Interaction", rel$x, "vs.", rel$y)
@@ -1232,7 +1236,10 @@ generate_correlation_plot_configs <- function(df, df_reference) {
        lm_line = list(
          intercept = intercept,
          slope = slope,
-          color = "tomato3"
+          color = "tomato3",
+          linewidth = 0.8,
+          xmin = xmin,
+          xmax = xmax
        ),
        color = "gray70",
        filter_na = TRUE,
@@ -1489,10 +1496,10 @@ main <- function() {
    )

    # Parallelize background and quality control plot generation
-    furrr::future_map(plot_configs, function(config) {
-      generate_and_save_plots(config$out_dir, config$filename, config$plot_configs,
-        page_width = config$page_width, page_height = config$page_height)
-    }, .options = furrr_options(seed = TRUE))
+    # furrr::future_map(plot_configs, function(config) {
+    #   generate_and_save_plots(config$out_dir, config$filename, config$plot_configs,
+    #     page_width = config$page_width, page_height = config$page_height)
+    # }, .options = furrr_options(seed = TRUE))

    # Loop over background strains
    # TODO currently only tested against one strain, if we want to do multiple strains we'll
@@ -1605,77 +1612,77 @@ main <- function() {
      # deletion_plot_configs <- generate_interaction_plot_configs(df_reference_summary_stats, df_interactions_joined, "deletion")
      # generate_and_save_plots(out_dir, "interaction_plots", deletion_plot_configs, page_width = 16, page_height = 16)

-      message("Writing enhancer/suppressor csv files")
-      interaction_threshold <- 2  # TODO add to study config?
-      enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold
-      suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold
-      enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold
-      suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold
-      enhancers_L <- df_interactions[enhancer_condition_L, ]
-      suppressors_L <- df_interactions[suppressor_condition_L, ]
-      enhancers_K <- df_interactions[enhancer_condition_K, ]
-      suppressors_K <- df_interactions[suppressor_condition_K, ]
-      enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
-      enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
-      write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
-      write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
-      write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
-      write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
-      write.csv(enhancers_and_suppressors_L,
-        file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
-      write.csv(enhancers_and_suppressors_K,
-        file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
+      # message("Writing enhancer/suppressor csv files")
+      # interaction_threshold <- 2  # TODO add to study config?
+      # enhancer_condition_L <- df_interactions$Avg_Zscore_L >= interaction_threshold
+      # suppressor_condition_L <- df_interactions$Avg_Zscore_L <= -interaction_threshold
+      # enhancer_condition_K <- df_interactions$Avg_Zscore_K >= interaction_threshold
+      # suppressor_condition_K <- df_interactions$Avg_Zscore_K <= -interaction_threshold
+      # enhancers_L <- df_interactions[enhancer_condition_L, ]
+      # suppressors_L <- df_interactions[suppressor_condition_L, ]
+      # enhancers_K <- df_interactions[enhancer_condition_K, ]
+      # suppressors_K <- df_interactions[suppressor_condition_K, ]
+      # enhancers_and_suppressors_L <- df_interactions[enhancer_condition_L | suppressor_condition_L, ]
+      # enhancers_and_suppressors_K <- df_interactions[enhancer_condition_K | suppressor_condition_K, ]
+      # write.csv(enhancers_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_L.csv"), row.names = FALSE)
+      # write.csv(suppressors_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_L.csv"), row.names = FALSE)
+      # write.csv(enhancers_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_K.csv"), row.names = FALSE)
+      # write.csv(suppressors_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_K.csv"), row.names = FALSE)
+      # write.csv(enhancers_and_suppressors_L,
+      #   file = file.path(out_dir, "zscore_interactions_deletion_enhancers_and_suppressors_L.csv"), row.names = FALSE)
+      # write.csv(enhancers_and_suppressors_K,
+      #   file = file.path(out_dir, "zscore_interaction_deletion_enhancers_and_suppressors_K.csv"), row.names = FALSE)
      
-      message("Writing linear model enhancer/suppressor csv files")
-      lm_interaction_threshold <- 2 # TODO add to study config?
-      enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ]
-      suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ]
-      enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ]
-      suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ]
-      write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
-      write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
-      write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
-      write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)
+      # message("Writing linear model enhancer/suppressor csv files")
+      # lm_interaction_threshold <- 2 # TODO add to study config?
+      # enhancers_lm_L <- df_interactions[df_interactions$Z_lm_L >= lm_interaction_threshold, ]
+      # suppressors_lm_L <- df_interactions[df_interactions$Z_lm_L <= -lm_interaction_threshold, ]
+      # enhancers_lm_K <- df_interactions[df_interactions$Z_lm_K >= lm_interaction_threshold, ]
+      # suppressors_lm_K <- df_interactions[df_interactions$Z_lm_K <= -lm_interaction_threshold, ]
+      # write.csv(enhancers_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_L.csv"), row.names = FALSE)
+      # write.csv(suppressors_lm_L, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_L.csv"), row.names = FALSE)
+      # write.csv(enhancers_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_enhancers_lm_K.csv"), row.names = FALSE)
+      # write.csv(suppressors_lm_K, file = file.path(out_dir, "zscore_interactions_deletion_suppressors_lm_K.csv"), row.names = FALSE)

-      message("Generating rank plots")
-      rank_plot_configs <- generate_rank_plot_configs(
-        df_interactions,
-        is_lm = FALSE,
-        adjust = TRUE
-      )
-      generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs,
-        page_width = 18, page_height = 12)
+      # message("Generating rank plots")
+      # rank_plot_configs <- generate_rank_plot_configs(
+      #   df_interactions,
+      #   is_lm = FALSE,
+      #   adjust = TRUE
+      # )
+      # generate_and_save_plots(out_dir, "rank_plots", rank_plot_configs,
+      #   page_width = 18, page_height = 12)

-      message("Generating ranked linear model plots")
-      rank_lm_plot_configs <- generate_rank_plot_configs(
-        df_interactions,
-        is_lm = TRUE,
-        adjust = TRUE
-      )
-      generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs,
-        page_width = 18, page_height = 12)
+      # message("Generating ranked linear model plots")
+      # rank_lm_plot_configs <- generate_rank_plot_configs(
+      #   df_interactions,
+      #   is_lm = TRUE,
+      #   adjust = TRUE
+      # )
+      # generate_and_save_plots(out_dir, "rank_plots_lm", rank_lm_plot_configs,
+      #   page_width = 18, page_height = 12)

-      message("Generating overlapped ranked plots")
-      rank_plot_filtered_configs <- generate_rank_plot_configs(
-        df_interactions,
-        is_lm = FALSE,
-        adjust = FALSE,
-        filter_na = TRUE,
-        overlap_color = TRUE
-      )
-      generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs,
-        page_width = 18, page_height = 12)
+      # message("Generating overlapped ranked plots")
+      # rank_plot_filtered_configs <- generate_rank_plot_configs(
+      #   df_interactions,
+      #   is_lm = FALSE,
+      #   adjust = FALSE,
+      #   filter_na = TRUE,
+      #   overlap_color = TRUE
+      # )
+      # generate_and_save_plots(out_dir, "rank_plots_na_rm", rank_plot_filtered_configs,
+      #   page_width = 18, page_height = 12)

-      message("Generating overlapped ranked linear model plots")
-      rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
-        df_interactions,
-        is_lm = TRUE,
-        adjust = FALSE,
-        filter_na = TRUE,
-        overlap_color = TRUE
-      )
-      generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs,
-        page_width = 18, page_height = 12)
+      # message("Generating overlapped ranked linear model plots")
+      # rank_plot_lm_filtered_configs <- generate_rank_plot_configs(
+      #   df_interactions,
+      #   is_lm = TRUE,
+      #   adjust = FALSE,
+      #   filter_na = TRUE,
+      #   overlap_color = TRUE
+      # )
+      # generate_and_save_plots(out_dir, "rank_plots_lm_na_rm", rank_plot_lm_filtered_configs,
+      #   page_width = 18, page_height = 12)

      message("Generating correlation curve parameter pair plots")
      correlation_plot_configs <- generate_correlation_plot_configs(