From 8d398464e843f6df862b52461757ea5c933164c2 Mon Sep 17 00:00:00 2001
From: Bryan Roessler <bryanroessler@gmail.com>
Date: Tue, 1 Oct 2024 12:59:00 -0400
Subject: [PATCH] Fix rank columns names

---
 .../apps/r/calculate_interaction_zscores.R    | 63 ++++++++++---------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
index 6990ac3f..1f811940 100644
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -2,6 +2,7 @@ suppressMessages({
   library("ggplot2")
   library("plotly")
   library("htmlwidgets")
+  library("htmltools")
   library("dplyr")
   library("rlang")
   library("ggthemes")
@@ -87,7 +88,10 @@ args <- parse_arguments()
 # dir.create(file.path(args$out_dir, "zscores"), showWarnings = FALSE)
 # dir.create(file.path(args$out_dir, "zscores", "qc"), showWarnings = FALSE)
 
-theme_publication <- function(base_size = 14, base_family = "sans", legend_position = "bottom") {
+theme_publication <- function(base_size = 14, base_family = "sans", legend_position = NULL) {
+  # Ensure that legend_position has a valid value or default to "none"
+  legend_position <- if (is.null(legend_position) || length(legend_position) == 0) "none" else legend_position
+  
   theme_foundation <- ggthemes::theme_foundation(base_size = base_size, base_family = base_family)
   
   theme_foundation %+replace%
@@ -99,22 +103,24 @@ theme_publication <- function(base_size = 14, base_family = "sans", legend_posit
       panel.border = element_blank(),
       axis.title = element_text(face = "bold", size = rel(1.4)),
       axis.title.y = element_text(angle = 90, vjust = 2),
-      # axis.title.x = element_text(vjust = -0.2), # TODO this causes errors
       axis.text = element_text(size = rel(1.2)),
       axis.line = element_line(colour = "black"),
-      # axis.ticks = element_line(),
       panel.grid.major = element_line(colour = "#f0f0f0"),
       panel.grid.minor = element_blank(),
       legend.key = element_rect(colour = NA),
       legend.position = legend_position,
-      legend.direction = ifelse(legend_position == "right", "vertical", "horizontal"),
-      # legend.key.size = unit(0.5, "cm"),
+      legend.direction =
+        if (legend_position == "right") {
+          "vertical"
+        } else if (legend_position == "bottom") {
+          "horizontal"
+        } else {
+          NULL  # No legend direction if position is "none" or other values
+        },
       legend.spacing = unit(0, "cm"),
       legend.title = element_text(face = "italic", size = rel(1.3)),
       legend.text = element_text(size = rel(1.2)),
       plot.margin = unit(c(10, 5, 5, 5), "mm")
-      # strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
-      # strip.text = element_text(face = "bold")
     )
 }
 
@@ -377,7 +383,7 @@ calculate_interaction_scores <- function(df, max_conc, bg_stats,
   df_with_calculations_clean <- df_with_calculations %>%
     select(-any_of(
       setdiff(intersect(names(df_with_calculations), names(interactions)),
-      c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor"))))
+      c("OrfRep", "Gene", "num", "conc_num", "conc_num_factor", "conc_num_factor_factor"))))
 
   # Join with interactions to create the full dataset
   full_data <- left_join(df_with_calculations_clean, interactions,
@@ -430,7 +436,6 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
       }
     )
 
-    # Apply theme_publication with legend_position
     plot <- ggplot(df, aes_mapping) + theme_publication(legend_position = config$legend_position)
 
     # Apply appropriate plot function
@@ -438,7 +443,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
       "scatter" = generate_scatter_plot(plot, config),
       "box" = generate_box_plot(plot, config),
       "density" = plot + geom_density(),
-      "bar" = plot + geom_bar(stat = "count"), # count occurrences
+      "bar" = plot + geom_bar(),
       plot  # default (unused)
     )
 
@@ -455,13 +460,15 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
       plotly_plot <- suppressWarnings(plotly::ggplotly(plot))
     }
 
+    if (!is.null(plotly_plot[["frames"]])) {
+      plotly_plot[["frames"]] <- NULL
+    }
+
     # Adjust legend position in plotly
     if (!is.null(config$legend_position) && config$legend_position == "bottom") {
       plotly_plot <- plotly_plot %>% layout(legend = list(orientation = "h"))
-      plotly_plot <- plotly_plot + theme(legend.direction = NULL)
     }
 
-    # Add static and interactive plots to lists
     static_plots[[i]] <- plot
     plotly_plots[[i]] <- plotly_plot
   }
@@ -483,7 +490,7 @@ generate_and_save_plots <- function(out_dir, filename, plot_configs) {
 
   dev.off()
 
-  # Save individual interactive HTML plots without subplot
+  # Save individual interactive HTML plots
   for (i in seq_along(plotly_plots)) {
     html_file <- file.path(out_dir, paste0(filename, "_plot_", i, ".html"))
     message("Saving HTML plot ", i, ": ", html_file)
@@ -799,24 +806,20 @@ generate_rank_plot_configs <- function(df, variables, is_lm = FALSE, adjust = FA
   sd_bands <- c(1, 2, 3)
   avg_zscore_cols <- paste0("Avg_Zscore_", variables)
   z_lm_cols <- paste0("Z_lm_", variables)
-  rank_avg_zscore_cols <- paste0("Rank_", variables)
-  rank_z_lm_cols <- paste0("Rank_lm_", variables)
 
   configs <- list()
 
   # Adjust values if necessary
   if (adjust) {
     df <- df %>%
-      mutate(
-        across(all_of(avg_zscore_cols), ~ifelse(is.na(.), 0.001, .)),
-        across(all_of(z_lm_cols), ~ifelse(is.na(.), 0.001, .))
-      )
+      mutate(across(all_of(avg_zscore_cols), ~ ifelse(is.na(.), 0.001, .))) %>%
+      mutate(across(all_of(z_lm_cols), ~ ifelse(is.na(.), 0.001, .)))
   }
 
   # Calculate rank columns for Avg_Zscore and Z_lm columns
   df_ranked <- df %>%
-    mutate(across(all_of(avg_zscore_cols), ~rank(., na.last = "keep"), .names = paste0("Rank_", avg_zscore_cols))) %>%
-    mutate(across(all_of(z_lm_cols), ~rank(., na.last = "keep"), .names = paste0("Rank_lm_", z_lm_cols)))
+    mutate(across(all_of(avg_zscore_cols), rank, .names = "Rank_{col}")) %>%
+    mutate(across(all_of(z_lm_cols), rank, .names = "Rank_lm_{col}"))
 
   # Generate plots for SD-based L and K variables
   for (variable in c("L", "K")) {
@@ -1004,8 +1007,6 @@ main <- function() {
     
     # Save some constants
     max_conc <- max(df$conc_num_factor)
-    l_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
-    k_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
 
     message("Calculating summary statistics before quality control")
     df_stats <- calculate_summary_stats(
@@ -1137,8 +1138,8 @@ main <- function() {
           position = "jitter",
           annotations = list(
             list(
-              x = l_half_median,
-              y = k_half_median,
+              x = median(df_above_tolerance$L, na.rm = TRUE) / 2,
+              y = median(df_above_tolerance$K, na.rm = TRUE) / 2,
               label = paste("# strains above Delta Background tolerance =", nrow(df_above_tolerance))
             )
           ),
@@ -1187,12 +1188,12 @@ main <- function() {
           tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
           annotations = list(
             list(
-              x = mean(df_na_l_outside_2sd_k_stats$L, na.rm = TRUE),
-              y = mean(df_na_l_outside_2sd_k_stats$K, na.rm = TRUE),
+              x = median(df_na_l_outside_2sd_k_stats$L, na.rm = TRUE) / 2,
+              y = median(df_na_l_outside_2sd_k_stats$K, na.rm = TRUE) / 2,
               label = paste("Total strains:", nrow(df_na_l_outside_2sd_k_stats))
             )
           ),
-          error_bar = FALSE,  # No error bars for this plot
+          error_bar = FALSE,
           legend_position = "right"
         )
       )
@@ -1211,12 +1212,12 @@ main <- function() {
           tooltip_vars = c("OrfRep", "Gene", "delta_bg"),
           annotations = list(
             list(
-              x = mean(df_na_l_outside_2sd_k_stats$delta_bg, na.rm = TRUE),
-              y = mean(df_na_l_outside_2sd_k_stats$K, na.rm = TRUE),
+              x = median(df_na_l_outside_2sd_k_stats$delta_bg, na.rm = TRUE) / 2,
+              y = median(df_na_l_outside_2sd_k_stats$K, na.rm = TRUE) / 2,
               label = paste("Total strains:", nrow(df_na_l_outside_2sd_k_stats))
             )
           ),
-          error_bar = FALSE,  # No error bars for this plot
+          error_bar = FALSE,
           legend_position = "right"
         )
       )