Relocate project

2024-09-10 13:23:18 -04:00
parent 3833f75184
commit a8cf3c5325
6220 changed files with 4859 additions and 69 deletions
--- a/qhtcp-workflow/apps/r/170503_DAmPs_Only.txt
+++ b/qhtcp-workflow/apps/r/170503_DAmPs_Only.txt
--- a/qhtcp-workflow/apps/r/All_SGD_GOTerms_for_QHTCPtk.csv
+++ b/qhtcp-workflow/apps/r/All_SGD_GOTerms_for_QHTCPtk.csv
--- a/qhtcp-workflow/apps/r/CompileGTF.R
+++ b/qhtcp-workflow/apps/r/CompileGTF.R
@@ -0,0 +1,54 @@
+#!/usr/bin/env Rscript
+
+# Load the openxlsx package
+library(openxlsx)
+
+args <- commandArgs(TRUE)
+
+outDir <- args[1]
+
+# Function to combine CSV and TXT files into a workbook with named sheets
+combineFilesToWorkbook <- function(file_list, output_file) {
+  # Create a new workbook
+  wb <- createWorkbook()
+  
+  # Loop through the file list
+  for (i in seq_along(file_list)) {
+    file <- file_list[i]
+    
+    # Extract the sheet name from the file name based on the extension
+    ext <- tools::file_ext(file)
+    sheet_name <- tools::file_path_sans_ext(basename(file))
+    
+    # Read the data based on the file extension
+    if (ext %in% c("csv", "txt")) {
+      if (ext == "csv") {
+        data <- read.csv(file)
+      } else if (ext == "txt") {
+        data <- readLines(file)
+      }
+      
+      # Create a new sheet in the workbook with the extracted sheet name
+      addWorksheet(wb, sheetName = sheet_name)
+      
+      # Write the data to the sheet
+      writeData(wb, sheet = sheet_name, x = data)
+    }
+  }
+  
+  # Save the workbook as an Excel file
+  saveWorkbook(wb, output_file)
+}
+
+Component <- file.path(outDir, "Component", "ComponentResults.txt")
+Function <- file.path(outDir, "Function", "FunctionResults.txt")
+Process <- file.path(outDir, "Process", "ProcessResults.txt")
+
+# Specify the list of input files (both CSV and TXT)
+file_list <- c(Component, Process, Function)
+
+# Specify the output file name
+output_file <- file.path(outDir, "GTFCombined.xlsx")
+
+# Call the function to combine the files into a workbook with named sheets
+combineFilesToWorkbook(file_list, output_file)
--- a/qhtcp-workflow/apps/r/PairwiseLK.R
+++ b/qhtcp-workflow/apps/r/PairwiseLK.R
@@ -0,0 +1,895 @@
+# This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments
+
+library("ggplot2")
+library("plotly")
+library("htmlwidgets")
+library("extrafont")
+library("grid")
+library("ggthemes")
+
+args <- commandArgs(TRUE)
+exp1_name <- args[1]
+exp1_file <- args[2]
+exp2_name <- args[3]
+exp2_file <- args[4]
+output_dir <- args[5]
+
+pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp1_name, "-", exp2_name))
+pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp1_name, "-", exp2_name))
+
+# Pairwise L
+# outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
+
+# Theme elements for plots
+theme_Publication <- function(base_size = 14, base_family = "sans") {
+  (theme_foundation(base_size = base_size, base_family = base_family) +
+    theme(
+      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
+      text = element_text(),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.title.y = element_text(angle = 90, vjust = 2),
+      axis.title.x = element_text(vjust = -0.2),
+      axis.text = element_text(),
+      axis.line = element_line(colour = "black"),
+      axis.ticks = element_line(),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.key = element_rect(colour = NA),
+      legend.position = "bottom",
+      legend.direction = "horizontal",
+      legend.key.size = unit(0.2, "cm"),
+      legend.spacing = unit(0, "cm"),
+      legend.title = element_text(face = "italic"),
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+  )
+}
+
+theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
+  (theme_foundation(base_size = base_size, base_family = base_family) +
+    theme(
+      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
+      text = element_text(),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.title.y = element_text(angle = 90, vjust = 2),
+      axis.title.x = element_text(vjust = -0.2),
+      axis.text = element_text(),
+      axis.line = element_line(colour = "black"),
+      axis.ticks = element_line(),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.key = element_rect(colour = NA),
+      legend.position = "right",
+      legend.direction = "vertical",
+      legend.key.size = unit(0.5, "cm"),
+      legend.spacing = unit(0, "cm"),
+      legend.title = element_text(face = "italic"),
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+  )
+}
+
+scale_fill_Publication <- function(...) {
+  discrete_scale("fill", "Publication", manual_pal(
+    values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
+}
+
+scale_colour_Publication <- function(...) {
+  discrete_scale("colour", "Publication", manual_pal(
+    values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
+}
+
+X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
+X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
+
+X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
+gg <- ggplot(data = X, aes(
+  x = Z_lm_L_Avg_X1,
+  y = Z_lm_L_Avg_X2,
+  color = Ontology_Avg_X1,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_L_SD_X1,
+  SD_2 = Z_lm_L_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_rect(
+    aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
+    color = "grey20",
+    size = 0.25,
+    alpha = 0.1,
+    inherit.aes = FALSE,
+    fill = NA
+  ) +
+  geom_point(shape = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
+  width = 12,
+  height = 8
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+# ID aggravators and alleviators, regardless of whether they meet 2SD threshold
+X1_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 < 2), ]
+X1_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 > -2), ]
+X2_Specific_Aggravators <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 < 2), ]
+X2_Specific_Alleviators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 > -2), ]
+Overlap_Aggravators <- X[which(X$Z_lm_L_Avg_X1 >= 2 & X$Z_lm_L_Avg_X2 >= 2), ]
+Overlap_Alleviators <- X[which(X$Z_lm_L_Avg_X1 <= -2 & X$Z_lm_L_Avg_X2 <= -2), ]
+X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_L_Avg_X2 >= 2 & X$Z_lm_L_Avg_X1 <= -2), ]
+X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm_L_Avg_X1 >= 2), ]
+X$Overlap_Avg <- NA
+
+try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
+  paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
+  paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
+  paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
+  paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
+try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
+try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
+try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
+  paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
+  paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
+
+gg <- ggplot(data = X, aes(
+  x = Z_lm_L_Avg_X1,
+  y = Z_lm_L_Avg_X2,
+  color = Overlap_Avg,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_L_SD_X1,
+  SD_2 = Z_lm_L_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_rect(
+    aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
+    color = "grey20",
+    size = 0.25,
+    alpha = 0.1,
+    inherit.aes = FALSE,
+    fill = NA
+  ) +
+  geom_point(shape = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
+  width = 12,
+  height = 8
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
+
+#3
+gg <- ggplot(data = x_rem2_gene, aes(
+  x = Z_lm_L_Avg_X1,
+  y = Z_lm_L_Avg_X2,
+  color = Overlap_Avg,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_L_SD_X1,
+  SD_2 = Z_lm_L_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
+  width = 12,
+  height = 8
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+#4
+X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)), ]
+gg <- ggplot(data = X_overlap_nothresold, aes(
+  x = Z_lm_L_Avg_X1,
+  y = Z_lm_L_Avg_X2,
+  color = Overlap_Avg,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_L_SD_X1,
+  SD_2 = Z_lm_L_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
+  width = 12,
+  height = 8
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+# Only output GTA terms where average score is still above 2 after subtracting the SD
+# Z1 will ID aggravators, Z2 alleviators
+Z1 <- X
+Z1$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 - Z1$Z_lm_L_SD_X1
+Z1$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 - Z1$Z_lm_L_SD_X2
+Z2 <- X
+Z2$L_Subtract_SD_X1 <- Z1$Z_lm_L_Avg_X1 + Z1$Z_lm_L_SD_X1
+Z2$L_Subtract_SD_X2 <- Z1$Z_lm_L_Avg_X2 + Z1$Z_lm_L_SD_X2
+X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2), ]
+X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2), ]
+X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2), ]
+X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2), ]
+Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2), ]
+Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2), ]
+X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2), ]
+X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ]
+X$Overlap <- NA
+
+try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
+try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Enhancers")
+try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Suppressors")
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
+  paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
+  paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
+
+#5
+X_abovethreshold <- X[!(is.na(X$Overlap)), ]
+
+gg <- ggplot(data = X_abovethreshold, aes(
+  x = Z_lm_L_Avg_X1,
+  y = Z_lm_L_Avg_X2,
+  color = Overlap,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_L_SD_X1,
+  SD_2 = Z_lm_L_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
+  width = 12,
+  height = 8
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+#6
+gg <- ggplot(data = X_abovethreshold, aes(
+  x = Z_lm_L_Avg_X1,
+  y = Z_lm_L_Avg_X2,
+  color = Overlap,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_L_SD_X1,
+  SD_2 = Z_lm_L_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3, size = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
+  width = 20,
+  height = 20
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+X_abovethreshold$X1_Rank <- NA
+X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X1, ties.method = "random")
+X_abovethreshold$X2_Rank <- NA
+X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_L_Avg_X2, ties.method = "random")
+
+#7
+gg <- ggplot(data = X_abovethreshold, aes(
+  x = Z_lm_L_Avg_X1,
+  y = Z_lm_L_Avg_X2,
+  color = Overlap,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_L_SD_X1,
+  SD_2 = Z_lm_L_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3, size = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
+  width = 15,
+  height = 15
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+fname <-
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+#8
+gg <- ggplot(data = X_abovethreshold, aes(
+  x = Z_lm_L_Avg_X1,
+  y = Z_lm_L_Avg_X2,
+  color = Overlap,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_L_SD_X1,
+  SD_2 = Z_lm_L_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3, size = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
+  width = 15,
+  height = 15
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <-
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+write.csv(
+  x = X,
+  file.path(pairDirL, paste0("All_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
+  row.names = FALSE
+)
+
+write.csv(
+  x = X_abovethreshold,
+  file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
+  row.names = FALSE
+)
+
+# Begin Pairwsise K
+# Theme elements for plots
+theme_Publication <- function(base_size = 14, base_family = "sans") {
+  (theme_foundation(base_size = base_size, base_family = base_family) +
+    theme(
+      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
+      text = element_text(),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.title.y = element_text(angle = 90, vjust = 2),
+      axis.title.x = element_text(vjust = -0.2),
+      axis.text = element_text(),
+      axis.line = element_line(colour = "black"),
+      axis.ticks = element_line(),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.key = element_rect(colour = NA),
+      legend.position = "bottom",
+      legend.direction = "horizontal",
+      legend.key.size = unit(0.2, "cm"),
+      legend.spacing = unit(0, "cm"),
+      legend.title = element_text(face = "italic"),
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+  )
+}
+
+scale_fill_Publication <- function(...) {
+  library("scales")
+  discrete_scale(
+    "fill",
+    "Publication",
+    manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
+    ...
+  )
+}
+
+scale_colour_Publication <- function(...) {
+  discrete_scale(
+    "colour",
+    "Publication",
+    manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
+    ...
+  )
+}
+
+theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
+  (theme_foundation(base_size = base_size, base_family = base_family) +
+    theme(
+      plot.title = element_text(face = "bold",
+      size = rel(1.2), hjust = 0.5),
+      text = element_text(),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.title.y = element_text(angle = 90, vjust = 2),
+      axis.title.x = element_text(vjust = -0.2),
+      axis.text = element_text(),
+      axis.line = element_line(colour = "black"),
+      axis.ticks = element_line(),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.key = element_rect(colour = NA),
+      legend.position = "right",
+      legend.direction = "vertical",
+      legend.key.size = unit(0.5, "cm"),
+      legend.spacing = unit(0, "cm"),
+      legend.title = element_text(face = "italic"),
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+  )
+}
+
+scale_fill_Publication <- function(...) {
+  discrete_scale(
+    "fill",
+    "Publication",
+    manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
+    ...
+  )
+}
+
+scale_colour_Publication <- function(...) {
+  discrete_scale(
+    "colour",
+    "Publication",
+    manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
+    ...
+  )
+}
+
+X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
+X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
+
+#1
+X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
+
+gg <- ggplot(data = X, aes(
+  x = Z_lm_K_Avg_X1,
+  y = Z_lm_K_Avg_X2,
+  color = Ontology_Avg_X1,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_K_SD_X1,
+  SD_2 = Z_lm_K_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
+  width = 12,
+  height = 8
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+#2
+# ID aggravators and alleviators, regardless of whether they meet 2SD threshold
+X1_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 < 2), ]
+X1_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 > -2), ]
+X2_Specific_Aggravators <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 < 2), ]
+X2_Specific_Alleviators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 > -2), ]
+Overlap_Aggravators <- X[which(X$Z_lm_K_Avg_X1 >= 2 & X$Z_lm_K_Avg_X2 >= 2), ]
+Overlap_Alleviators <- X[which(X$Z_lm_K_Avg_X1 <= -2 & X$Z_lm_K_Avg_X2 <= -2), ]
+X2_Specific_Aggravators_X1_Alleviatiors <- X[which(X$Z_lm_K_Avg_X2 >= 2 & X$Z_lm_K_Avg_X1 <= -2), ]
+X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm_K_Avg_X1 >= 2), ]
+X$Overlap_Avg <- NA
+
+try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
+  paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
+  paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
+  paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
+  paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <-
+  "Overlapping_Deletion_Suppressors")
+try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <-
+  "Overlapping_Deletion_Enhancers")
+try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
+  paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
+  paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
+
+plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
+gg <- ggplot(data = X, aes(
+  x = Z_lm_K_Avg_X1,
+  y = Z_lm_K_Avg_X2,
+  color = Overlap_Avg,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_K_SD_X1,
+  SD_2 = Z_lm_K_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
+  width = 12,
+  height = 8
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+
+#2
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+#3
+x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
+plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
+gg <- ggplot(data = x_rem2_gene, aes(
+  x = Z_lm_K_Avg_X1,
+  y = Z_lm_K_Avg_X2,
+  color = Overlap_Avg,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_K_SD_X1,
+  SD_2 = Z_lm_K_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
+  width = 12,
+  height = 8
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+#4
+X_overlap_nothresold <- X[!(is.na(X$Overlap_Avg)), ]
+gg <- ggplot(data = X_overlap_nothresold, aes(
+  x = Z_lm_K_Avg_X1,
+  y = Z_lm_K_Avg_X2,
+  color = Overlap_Avg,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_K_SD_X1,
+  SD_2 = Z_lm_K_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
+  width = 12,
+  height = 8
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+#5
+# Only output GTA terms where average score is still above 2 after subtracting the SD
+# Z1 will ID aggravators, Z2 alleviators
+Z1 <- X
+Z1$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 - Z1$Z_lm_K_SD_X1
+Z1$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 - Z1$Z_lm_K_SD_X2
+Z2 <- X
+Z2$L_Subtract_SD_X1 <- Z1$Z_lm_K_Avg_X1 + Z1$Z_lm_K_SD_X1
+Z2$L_Subtract_SD_X2 <- Z1$Z_lm_K_Avg_X2 + Z1$Z_lm_K_SD_X2
+X1_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 < 2), ]
+X1_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X1 <= -2 & Z2$L_Subtract_SD_X2 > -2), ]
+X2_Specific_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z1$L_Subtract_SD_X1 < 2), ]
+X2_Specific_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 > -2), ]
+Overlap_Aggravators2 <- Z1[which(Z1$L_Subtract_SD_X1 >= 2 & Z1$L_Subtract_SD_X2 >= 2), ]
+Overlap_Alleviators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z2$L_Subtract_SD_X1 <= -2), ]
+X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 & Z2$L_Subtract_SD_X1 <= -2), ]
+X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ]
+X$Overlap <- NA
+
+try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <-
+  paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <-
+  paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <-
+  paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <-
+  paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <-
+  "Overlapping_Deletion_Suppressors")
+try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <-
+  "Overlapping_Deletion_Enhancers")
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
+  paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
+  paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
+
+X_abovethreshold <- X[!(is.na(X$Overlap)), ]
+gg <- ggplot(data = X_abovethreshold, aes(
+  x = Z_lm_K_Avg_X1,
+  y = Z_lm_K_Avg_X2,
+  color = Overlap,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_K_SD_X1,
+  SD_2 = Z_lm_K_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
+  width = 12,
+  height = 8
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+#6
+gg <- ggplot(data = X_abovethreshold, aes(
+  x = Z_lm_K_Avg_X1,
+  y = Z_lm_K_Avg_X2,
+  color = Overlap,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_K_SD_X1,
+  SD_2 = Z_lm_K_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3, size = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
+  width = 20,
+  height = 20
+)
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+#7
+X_abovethreshold$X1_Rank <- NA
+X_abovethreshold$X1_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X1, ties.method = "random")
+X_abovethreshold$X2_Rank <- NA
+X_abovethreshold$X2_Rank <- rank(-X_abovethreshold$Z_lm_K_Avg_X2, ties.method = "random")
+
+gg <- ggplot(data = X_abovethreshold, aes(
+  x = Z_lm_K_Avg_X1,
+  y = Z_lm_K_Avg_X2,
+  color = Overlap,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_K_SD_X1,
+  SD_2 = Z_lm_K_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3, size = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
+  width = 15,
+  height = 15
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <-
+  file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+#8
+gg <- ggplot(data = X_abovethreshold, aes(
+  x = Z_lm_K_Avg_X1,
+  y = Z_lm_K_Avg_X2,
+  color = Overlap,
+  Term = Term_Avg,
+  Genes = Genes_Avg_X1,
+  NumGenes = NumGenes_Avg_X1,
+  AllPossibleGenes = AllPossibleGenes_Avg_X1,
+  SD_1 = Z_lm_K_SD_X1,
+  SD_2 = Z_lm_K_SD_X2
+)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
+  geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
+  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+  geom_point(shape = 3, size = 3) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
+  theme_Publication_legend_right()
+
+pdf(
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
+  width = 15,
+  height = 15
+)
+
+gg
+dev.off()
+pgg <- ggplotly(gg)
+#pgg
+fname <-
+  file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
+htmlwidgets::saveWidget(pgg, fname)
+
+write.csv(
+  x = X,
+  file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
+  row.names = FALSE
+)
+
+write.csv(
+  x = X_abovethreshold,
+  file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
+  row.names = FALSE
+)
--- a/qhtcp-workflow/apps/r/SGD_features.tab
+++ b/qhtcp-workflow/apps/r/SGD_features.tab
--- a/qhtcp-workflow/apps/r/TSHeatmaps5dev2.R
+++ b/qhtcp-workflow/apps/r/TSHeatmaps5dev2.R
@@ -0,0 +1,810 @@
+#!/usr/bin/env Rscript
+# Makes heat maps of multiple experiments
+#
+# Updated 240724 Bryan C Roessler to improve file operations and portability
+# I tried to leave as much logic intact as possible, just feeding in vars in a better way
+# NOTE: The script now has 7 required arguments and a variable number of input experiments
+# @arg $1 string StudyInfo.csv file
+# @arg $2 string gene_ontology_edit.obo file
+# @arg $3 string go_terms.tab file
+# @arg $4 string All_SGD_GOTerms_for_QHTCPtk.csv
+# @arg $5 string base directory
+# @arg $6 string output directory
+
+library("ontologyIndex")
+library("ggplot2")
+library("RColorBrewer")
+library("grid")
+library("ggthemes")
+# library("plotly")
+# library("htmlwidgets")
+library("extrafont")
+library("stringr")
+library("org.Sc.sgd.db")
+library("ggrepel")
+library("gplots")
+
+# Load arguments
+args <- commandArgs(TRUE)
+study_info_file <- args[1]
+ontology_file <- args[2]
+sgd_terms_tfile <- args[3]
+all_sgd_terms_csv <- args[4]
+base_dir <- args[5]
+output_dir <- args[6]
+study_nums <- args[7:length(args)]
+
+# Import standard tables used in Sean's code That should be copied to each ExpStudy
+labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE)
+Ontology <- get_ontology(file = ontology_file, propagate_relationships = "is_a", extract_tags = "minimal")
+GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS) # all ORFs associated with GO term
+Terms <- read.delim(file = sgd_terms_tfile,
+  header = FALSE,
+  quote = "",
+  col.names = c("GO_ID", "GO_Term", "GO_Aspect", "GO_Term_Definition")
+)
+XX3 <- read.csv(file = all_sgd_terms_csv, stringsAsFactors = FALSE, header = TRUE)
+XX3[, 1] <- paste0("GO:", formatC(XX3[, 1], width = 7, flag = "0"))
+XX3[, 2] <- gsub(pattern = " ", replacement = "_", x = XX3[, 2])
+XX3[, 2] <- gsub(pattern = "/", replacement = "_", x = XX3[, 2])
+
+# Load input files
+for (study_num in study_nums) {
+  input_file <- file.path(base_dir, paste("Exp", study_num), zscores, "zscores_interaction.csv")
+  if (file.exists(input_file)) {
+    assign(paste(X, study_num), read.csv(file = input_file, stringsAsFactors = FALSE, header = TRUE))
+    assign(paste(Name, study_num), labels[study_num, 2])
+  }
+}
+
+for (study_num in study_nums) {
+  eval(paste("function", study_num))
+}
+
+if (length(study_nums) > 0) {
+  X1$ORF <- X1$OrfRep
+  X1$ORF <- gsub("_1", "", x = X1$ORF)
+  X1$ORF <- gsub("_2", "", x = X1$ORF)
+  X1$ORF <- gsub("_3", "", x = X1$ORF)
+  X1$ORF <- gsub("_4", "", x = X1$ORF)
+  
+  X1$Score_L <- "No Effect"
+  try(X1[is.na(X1$Z_lm_L), ]$Score_L <- "No Growth")
+  try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer")
+  try(X1[!is.na(X1$Z_lm_L) & X1$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor")
+  
+  X1$Score_K <- "No Effect"
+  try(X1[is.na(X1$Z_lm_K), ]$Score_K <- "No Growth")
+  try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor")
+  try(X1[!is.na(X1$Z_lm_K) & X1$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer")
+  
+  # Express the na data as 0.001 in X1 for K and L
+  X1[is.na(X1$Z_lm_L), ]$Z_lm_L <- 0.001
+  X1[is.na(X1$Z_lm_K), ]$Z_lm_K <- 0.001
+  X1$Rank_L <- rank(X1$Z_lm_L)
+  X1$Rank_K <- rank(X1$Z_lm_K)
+  X1 <- X1[order(X1$OrfRep, decreasing = FALSE), ]
+  colnames(X1) <- paste0(colnames(X1), "_X1")
+}
+
+if (length(study_nums) > 1) {
+  X2$ORF <- X2$OrfRep
+  X2$ORF <- gsub("_1", "", x = X2$ORF)
+  X2$ORF <- gsub("_2", "", x = X2$ORF)
+  X2$ORF <- gsub("_3", "", x = X2$ORF)
+  X2$ORF <- gsub("_4", "", x = X2$ORF)
+  
+  X2$Score_L <- "No Effect"
+  try(X2[is.na(X2$Z_lm_L), ]$Score_L <- "No Growth")
+  try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer")
+  try(X2[!is.na(X2$Z_lm_L) & X2$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor")
+  
+  X2$Score_K <- "No Effect"
+  try(X2[is.na(X2$Z_lm_K), ]$Score_K <- "No Growth")
+  try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor")
+  try(X2[!is.na(X2$Z_lm_K) & X2$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer")
+  #express the na data as 0.001 in X2
+  X2[is.na(X2$Z_lm_L), ]$Z_lm_L <- 0.001
+  X2[is.na(X2$Z_lm_K), ]$Z_lm_K <- 0.001
+  
+  X2$Rank_L <- rank(X2$Z_lm_L)
+  X2$Rank_K <- rank(X2$Z_lm_K)
+  
+  X2 <- X2[order(X2$OrfRep, decreasing = FALSE), ]
+  colnames(X2) <- paste0(colnames(X2), "_X2")
+  X <- cbind(X1, X2)
+}
+
+if (length(study_nums) > 2) {
+  X3$ORF <- X3$OrfRep
+  X3$ORF <- gsub("_1", "", x = X3$ORF)
+  X3$ORF <- gsub("_2", "", x = X3$ORF)
+  X3$ORF <- gsub("_3", "", x = X3$ORF)
+  X3$ORF <- gsub("_4", "", x = X3$ORF)
+  
+  X3$Score_L <- "No Effect"
+  try(X3[is.na(X3$Z_lm_L), ]$Score_L <- "No Growth")
+  try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer")
+  try(X3[!is.na(X3$Z_lm_L) & X3$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor")
+  
+  X3$Score_K <- "No Effect"
+  try(X3[is.na(X3$Z_lm_K), ]$Score_K <- "No Growth")
+  try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor")
+  try(X3[!is.na(X3$Z_lm_K) & X3$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer")
+  
+  # Express the na data as 0.001 in X3
+  X3[is.na(X3$Z_lm_L), ]$Z_lm_L <- 0.001
+  X3[is.na(X3$Z_lm_K), ]$Z_lm_K <- 0.001
+  X3$Rank_L <- rank(X3$Z_lm_L)
+  X3$Rank_K <- rank(X3$Z_lm_K)
+  X3 <- X3[order(X3$OrfRep, decreasing = FALSE), ]
+  colnames(X3) <- paste0(colnames(X3), "_X3")
+  X <- cbind(X, X3)
+}
+
+if (length(study_nums) > 3) {
+  X4$ORF <- X4$OrfRep
+  X4$ORF <- gsub("_1", "", x = X4$ORF)
+  X4$ORF <- gsub("_2", "", x = X4$ORF)
+  X4$ORF <- gsub("_3", "", x = X4$ORF)
+  X4$ORF <- gsub("_4", "", x = X4$ORF)
+  
+  X4$Score_L <- "No Effect"
+  try(X4[is.na(X4$Z_lm_L), ]$Score_L <- "No Growth")
+  try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer")
+  try(X4[!is.na(X4$Z_lm_L) & X4$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor")
+  
+  X4$Score_K <- "No Effect"
+  try(X4[is.na(X4$Z_lm_K), ]$Score_K <- "No Growth")
+  try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor")
+  try(X4[!is.na(X4$Z_lm_K) & X4$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer")
+  
+  # Express the na data as 0.001 in X4
+  X4[is.na(X4$Z_lm_L), ]$Z_lm_L <- 0.001
+  X4[is.na(X4$Z_lm_K), ]$Z_lm_K <- 0.001
+  X4$Rank_L <- rank(X4$Z_lm_L)
+  X4$Rank_K <- rank(X4$Z_lm_K)
+  X4 <- X4[order(X4$OrfRep, decreasing = FALSE), ]
+  colnames(X4) <- paste0(colnames(X4), "_X4")
+  X <- cbind(X, X4)
+}
+
+if (length(study_nums) > 4) {
+  X5$ORF <- X5$OrfRep
+  X5$ORF <- gsub("_1", "", x = X5$ORF)
+  X5$ORF <- gsub("_2", "", x = X5$ORF)
+  X5$ORF <- gsub("_3", "", x = X5$ORF)
+  X5$ORF <- gsub("_4", "", x = X5$ORF)
+  
+  X5$Score_L <- "No Effect"
+  try(X5[is.na(X5$Z_lm_L), ]$Score_L <- "No Growth")
+  try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L >= 2, ]$Score_L <- "Deletion Enhancer")
+  try(X5[!is.na(X5$Z_lm_L) & X5$Z_lm_L <= -2, ]$Score_L <- "Deletion Suppressor")
+  
+  X5$Score_K <- "No Effect"
+  try(X5[is.na(X5$Z_lm_K), ]$Score_K <- "No Growth")
+  try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K >= 2, ]$Score_K <- "Deletion Suppressor")
+  try(X5[!is.na(X5$Z_lm_K) & X5$Z_lm_K <= -2, ]$Score_K <- "Deletion Enhancer")
+  
+  # Express the na data as 0.001 in X5
+  X5[is.na(X5$Z_lm_L), ]$Z_lm_L <- 0.001
+  X5[is.na(X5$Z_lm_K), ]$Z_lm_K <- 0.001
+  X5$Rank_L <- rank(X5$Z_lm_L)
+  X5$Rank_K <- rank(X5$Z_lm_K)
+  X5 <- X5[order(X5$OrfRep, decreasing = FALSE), ]
+  colnames(X5) <- paste0(colnames(X5), "_X5")
+  X <- cbind(X, X5)
+}
+
+X$ORF <- X$OrfRep_X1
+
+if (length(study_nums) > 1) {
+  X$ORF <- gsub("_1", "", x = X$ORF)
+  try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1)
+  try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2)
+  X_heatmap <-
+    X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
+      colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+      colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
+      colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+      colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2"]
+  
+  X_heatmap <- X_heatmap[, c(10, 1, 4, 5, 8, 9, 2, 3, 6, 7)]
+  colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap))
+  colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap))
+  colnames(X_heatmap)[2] <- "Gene"
+}
+
+if (length(study_nums) > 2) {
+  X$ORF <- gsub("_1", "", x = X$ORF)
+  X$ORF <- gsub("_2", "", x = X$ORF)
+  try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1)
+  try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2)
+  try(X[X$Gene_X3 == "", ]$Gene_X3 <- X[X$Gene_X3 == "", ]$OrfRep_X3)
+  
+  X_heatmap <-
+    X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
+      colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+      colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
+      colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
+      colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+      colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
+      colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3"]
+
+  # Reorder columns
+  X_heatmap <- X_heatmap[, c(14, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11)]
+  colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap))
+  colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap))
+  colnames(X_heatmap) <- gsub(pattern = "X3", replacement = Name3, colnames(X_heatmap))
+  colnames(X_heatmap)[2] <- "Gene"
+}
+
+if (length(study_nums) > 3) {
+  X$ORF <- gsub("_1", "", x = X$ORF)
+  X$ORF <- gsub("_2", "", x = X$ORF)
+  X$ORF <- gsub("_3", "", x = X$ORF)
+  try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1)
+  try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2)
+  try(X[X$Gene_X3 == "", ]$Gene_X3 <- X[X$Gene_X3 == "", ]$OrfRep_X3)
+  try(X[X$Gene_X4 == "", ]$Gene_X4 <- X[X$Gene_X4 == "", ]$OrfRep_X4)
+  
+  X_heatmap <-
+    X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
+      colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+      colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
+      colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
+      colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
+      colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+      colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
+      colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
+      colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4"]
+
+  # Reorder columns
+  X_heatmap <- X_heatmap[, c(18, 1, 4, 5, 8, 9, 12, 13, 16, 17, 2, 3, 6, 7, 10, 11, 14, 15)]
+  colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap))
+  colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap))
+  colnames(X_heatmap) <- gsub(pattern = "X3", replacement = Name3, colnames(X_heatmap))
+  colnames(X_heatmap) <- gsub(pattern = "X4", replacement = Name4, colnames(X_heatmap))
+  colnames(X_heatmap)[2] <- "Gene"
+}
+
+if (length(study_nums) > 4) {
+  X$ORF <- gsub("_1", "", x = X$ORF)
+  X$ORF <- gsub("_2", "", x = X$ORF)
+  X$ORF <- gsub("_3", "", x = X$ORF)
+  X$ORF <- gsub("_4", "", x = X$ORF)
+  try(X[X$Gene_X1 == "", ]$Gene_X1 <- X[X$Gene_X1 == "", ]$OrfRep_X1)
+  try(X[X$Gene_X2 == "", ]$Gene_X2 <- X[X$Gene_X2 == "", ]$OrfRep_X2)
+  try(X[X$Gene_X3 == "", ]$Gene_X3 <- X[X$Gene_X3 == "", ]$OrfRep_X3)
+  try(X[X$Gene_X4 == "", ]$Gene_X4 <- X[X$Gene_X4 == "", ]$OrfRep_X4)
+  try(X[X$Gene_X5 == "", ]$Gene_X5 <- X[X$Gene_X5 == "", ]$OrfRep_X5)
+  
+  X_heatmap <-
+    X[colnames(X) == "ORF" | colnames(X) == "Gene_X1" |
+      colnames(X) == "Z_Shift_K_X1" | colnames(X) == "Z_lm_K_X1" |
+      colnames(X) == "Z_Shift_K_X2" | colnames(X) == "Z_lm_K_X2" |
+      colnames(X) == "Z_Shift_K_X3" | colnames(X) == "Z_lm_K_X3" |
+      colnames(X) == "Z_Shift_K_X4" | colnames(X) == "Z_lm_K_X4" |
+      colnames(X) == "Z_Shift_K_X5" | colnames(X) == "Z_lm_K_X5" |
+      colnames(X) == "Z_Shift_L_X1" | colnames(X) == "Z_lm_L_X1" |
+      colnames(X) == "Z_Shift_L_X2" | colnames(X) == "Z_lm_L_X2" |
+      colnames(X) == "Z_Shift_L_X3" | colnames(X) == "Z_lm_L_X3" |
+      colnames(X) == "Z_Shift_L_X4" | colnames(X) == "Z_lm_L_X4" |
+      colnames(X) == "Z_Shift_L_X5" | colnames(X) == "Z_lm_L_X5"]
+
+  # Reorder columns
+  X_heatmap <- X_heatmap[, c(22, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 2, 3, 6, 7, 10, 11, 14, 15, 18, 19)]
+  colnames(X_heatmap) <- gsub(pattern = "X1", replacement = Name1, colnames(X_heatmap))
+  colnames(X_heatmap) <- gsub(pattern = "X2", replacement = Name2, colnames(X_heatmap))
+  colnames(X_heatmap) <- gsub(pattern = "X3", replacement = Name3, colnames(X_heatmap))
+  colnames(X_heatmap) <- gsub(pattern = "X4", replacement = Name4, colnames(X_heatmap))
+  colnames(X_heatmap) <- gsub(pattern = "X5", replacement = Name5, colnames(X_heatmap))
+  colnames(X_heatmap)[2] <- "Gene"
+}
+
+# Theme elements for plots
+theme_Publication <- function(base_size = 14, base_family = "sans") {
+  (theme_foundation(base_size = base_size, base_family = base_family) +
+    theme(
+      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
+      text = element_text(),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.title.y = element_text(angle = 90, vjust = 2),
+      axis.title.x = element_text(vjust = -0.2),
+      axis.text = element_text(),
+      axis.line = element_line(colour = "black"),
+      axis.ticks = element_line(),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.key = element_rect(colour = NA),
+      legend.position = "bottom",
+      legend.direction = "horizontal",
+      legend.key.size = unit(0.2, "cm"),
+      legend.spacing = unit(0, "cm"),
+      legend.title = element_text(face = "italic"),
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+  )
+}
+
+scale_fill_Publication <- function(...) {
+  library(scales)
+  discrete_scale(
+    "fill",
+    "Publication",
+    manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
+    ...
+  )
+}
+
+scale_colour_Publication <- function(...) {
+  discrete_scale(
+    "colour",
+    "Publication",
+    manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
+    ...
+  )
+}
+
+theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
+  (theme_foundation(base_size = base_size, base_family = base_family) +
+    theme(
+      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
+      text = element_text(),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.title.y = element_text(angle = 90, vjust = 2),
+      axis.title.x = element_text(vjust = -0.2),
+      axis.text = element_text(),
+      axis.line = element_line(colour = "black"),
+      axis.ticks = element_line(),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.key = element_rect(colour = NA),
+      legend.position = "right",
+      legend.direction = "vertical",
+      legend.key.size = unit(0.5, "cm"),
+      legend.spacing = unit(0, "cm"),
+      legend.title = element_text(face = "italic"),
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+  )
+}
+
+scale_fill_Publication <- function(...) {
+  discrete_scale(
+    "fill",
+    "Publication",
+    manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
+    ...
+  )
+}
+
+scale_colour_Publication <- function(...) {
+  discrete_scale(
+    "colour",
+    "Publication",
+    manual_pal(values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506", "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")),
+    ...
+  )
+}
+
+Ontology <- get_ontology(file = ontology_file, propagate_relationships = "is_a", extract_tags = "minimal")
+print(Ontology)
+
+# All ORFs associated with GO term
+GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
+
+# Terms is the GO term list jwr moved up to TAABLES
+Terms <- read.delim(file = sgd_terms_tfile,
+  header = FALSE,
+  quote = "",
+  col.names = c("GO_ID", "GO_Term", "GO_Aspect", "GO_Term_Definition")
+)
+
+colormapbreaks <- c(-12, -10, -8, -6, -4, -2, 2, 4, 6, 8, 10, 12)
+
+for (s in 1:dim(XX3)[1]) {
+  # Ontology <-
+  #   get_ontology(file = "Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",
+  #     propagate_relationships = "is_a", extract_tags = "minimal")
+  # Ontology_Everything <-
+  #   get_ontology(file = "Documents/Hartman_Lab/SGD_Downloads/gene_ontology_edit.obo",
+  #     propagate_relationships = "is_a", extract_tags = "everything")
+  #
+  # GO_ID_Arg <- "GO:0006325"
+  GO_ID_Arg_loop <- as.character(XX3[s, 1])
+  GOTerm_parent <- get_descendants(Ontology, roots = GO_ID_Arg_loop)
+  # GOTerm_parent <- get_descendants(Ontology,roots = "GO:0006325")
+  # Only make plots if parent term has fewer than 500 children
+  Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]]))
+  
+  if (length(GOTerm_parent) > 100) {
+    next()
+  }
+  
+  Parent_Size <- length(as.vector(GO2ALLORFs[GO_ID_Arg_loop][[1]])) # TODO why twice?
+
+  if (Parent_Size < 2) {
+    next()
+  }
+
+  if (Parent_Size > 2000) {
+
+    pdf(
+      file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
+      width = 12,
+      height = 45,
+      onefile = TRUE
+    )
+
+    for (i in 1:length(GOTerm_parent)) {
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
+      # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
+      X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
+      if (dim(Genes_Annotated_to_Term)[1] > 2) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "row", cexCol = 0.7, cexRow = 0.5, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+    }
+    dev.off()
+  }
+  
+  if (Parent_Size >= 1000 && Parent_Size <= 2000) {
+
+    pdf(
+      file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
+      width = 12,
+      height = 35,
+      onefile = TRUE
+    )
+
+    for (i in 1:length(GOTerm_parent)) {
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
+      # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
+      X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
+      if (dim(Genes_Annotated_to_Term)[1] > 2) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+    }
+    dev.off()
+  }
+
+  if (Parent_Size >= 500 && Parent_Size <= 1000) {
+    
+    pdf(
+      file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
+      width = 12,
+      height = 30,
+      onefile = TRUE
+    )
+
+    for (i in 1:length(GOTerm_parent)) {
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
+      # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
+      X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
+      if (dim(Genes_Annotated_to_Term)[1] > 2) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "row", cexCol = 0.7, cexRow = 0.6, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+    }
+    dev.off()
+  }
+
+  if (Parent_Size >= 200 && Parent_Size <= 500) {
+    
+    pdf(
+      file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
+      width = 12,
+      height = 25,
+      onefile = TRUE
+    )
+
+    for (i in 1:length(GOTerm_parent)) {
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
+      # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
+      X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
+      if (dim(Genes_Annotated_to_Term)[1] > 2) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+    }
+    dev.off()
+  }
+
+  if (Parent_Size >= 100 && Parent_Size <= 200) {
+    
+    pdf(
+      file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
+      width = 12,
+      height = 20,
+      onefile = TRUE
+    )
+    
+    for (i in 1:length(GOTerm_parent)) {
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
+      # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
+      X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
+      if (dim(Genes_Annotated_to_Term)[1] > 2) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+    }
+    dev.off()
+  }
+
+  if (Parent_Size >= 60 && Parent_Size <= 100) {
+    
+    pdf(
+      file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
+      width = 12,
+      height = 15,
+      onefile = TRUE
+    )
+    
+    for (i in 1:length(GOTerm_parent)) {
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
+      # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
+      X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
+      if (dim(Genes_Annotated_to_Term)[1] > 2) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+    }
+    dev.off()
+  }
+
+  if (Parent_Size >= 30 && Parent_Size <= 60) {
+    
+    pdf(
+      file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
+      width = 12,
+      height = 10,
+      onefile = TRUE
+    )
+    
+    for (i in 1:length(GOTerm_parent)) {
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
+      # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
+      X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
+      if (dim(Genes_Annotated_to_Term)[1] > 2) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+      if (dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+    }
+    
+    dev.off()
+  }
+
+  if (Parent_Size >= 3 && Parent_Size <= 30) {
+    
+    pdf(
+      file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
+      width = 12,
+      height = 7,
+      onefile = TRUE
+    )
+    
+    for (i in 1:length(GOTerm_parent)) {
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
+      # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
+      X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
+      if (dim(Genes_Annotated_to_Term)[1] > 2) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+      if (dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+    }
+    dev.off()
+  }
+
+  if (Parent_Size == 2) {
+    
+    pdf(
+      file = file.path(output_dir, paste0(XX3[s, 2], ".pdf")),
+      width = 12,
+      height = 7,
+      onefile = TRUE
+    )
+    
+    for (i in 1:length(GOTerm_parent)) {
+      GO_Term <- GOTerm_parent[i]
+      GO_Term_Num <- as.integer(str_split_fixed(as.character(GO_Term), "\\:", 2)[, 2])
+      GO_Term_Name <- as.character(Terms[Terms$GO_ID == GO_Term_Num, ]$GO_Term)
+      # Genes_Annotated_to_Term <- Gene_Association[Gene_Association$GO_ID == GO_Term, ]
+      All_Genes_Annotated_to_Term <- as.vector(GO2ALLORFs[GO_Term][[1]])
+      Genes_Annotated_to_Term <- X_heatmap[X_heatmap$ORF %in% All_Genes_Annotated_to_Term, ]
+      X0 <- as.matrix(Genes_Annotated_to_Term[, 3:dim(Genes_Annotated_to_Term)[2]])
+      if (dim(Genes_Annotated_to_Term)[1] > 2) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "row", cexCol = 0.7, cexRow = 0.7, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+      if (dim(Genes_Annotated_to_Term)[1] <= 2 && dim(Genes_Annotated_to_Term)[1] > 0) {
+        try(heatmap.2(
+          x = X0,
+          Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+          dendrogram = "none", cexCol = 0.7, cexRow = 0.7, scale = "none",
+          breaks = colormapbreaks, symbreaks = FALSE, colsep = c(2, 4, 6), sepcolor = "white", offsetCol = 0.1,
+          ylab = "Gene",
+          cellnote = round(X0, digits = 0), notecex = 0.5, key = TRUE,
+          keysize = 0.5, trace = "none", density.info = c("none"), margins = c(10, 8),
+          na.color = "red", col = brewer.pal(11, "PuOr"),
+          main = GO_Term_Name,
+          # ColSideColors = ev_repeat,
+          labRow = as.character(Genes_Annotated_to_Term$Gene)
+        ))
+      }
+    }
+    dev.off()
+  }
+}
--- a/qhtcp-workflow/apps/r/Yeast_Human_Homology_Mapping_biomaRt_18_0920.csv
+++ b/qhtcp-workflow/apps/r/Yeast_Human_Homology_Mapping_biomaRt_18_0920.csv
--- a/qhtcp-workflow/apps/r/addShiftVals.R
+++ b/qhtcp-workflow/apps/r/addShiftVals.R
@@ -0,0 +1,90 @@
+#!/usr/bin/env Rscript
+# This script will add the shift data to the finalTable.csv file
+#
+# May want to reorder columns in excel before making heatmaps - otherwise all the shift data will be plotted next to each other.
+
+library(plyr)
+library(dplyr)
+library(sos)
+
+args <- commandArgs(TRUE)
+
+if (length(args) >= 1) {
+  finalTable <- file.path(args[1])
+} else {
+  finalTable <- "REMcRdy_lm_only.csv-finalTable.csv"  # for legacy workflow
+}
+
+if (length(args) >= 2) {
+  shiftFile <- file.path(args[2])
+} else {
+  shiftFile <- "Shift_only.csv"  # for legacy workflow
+}
+
+if (length(args) >= 3) {
+  studyInfo <- file.path(args[3])
+} else {
+  studyInfo <- "../Code/StudyInfo.csv" # for legacy workflow
+}
+
+if (length(args) >= 4) {
+  output <- file.path(args[4])
+} else {
+  output <- "REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
+}
+
+# Read in the REMc finalTable data
+X <- data.frame(read.csv(file = finalTable, header = TRUE, stringsAsFactors = FALSE))
+# Read in the shift data From ../JoinInteractions
+Y <- data.frame(read.csv(file = shiftFile, header = TRUE, stringsAsFactors = FALSE))
+Labels <- read.delim(studyInfo, skip = 0, as.is = TRUE, row.names = 1, strip.white = TRUE)
+
+# Determine the number of cols - needed to create the correct number of new cols
+Xcolnum <- length(X[1, ])
+ADDnum <- Xcolnum + length(Y[1, ]) - 2
+
+# Create new columns filled with NAs to be filled with data
+Xtemp <- X
+Xtemp[, (Xcolnum + 1):ADDnum] <- NA
+
+# Match the orf names in each row to a orf name in the shift data file and then add the shift data to the finalTable file
+shiftTbl < - as.data.frame(matrix(nrow = 1, ncol = length(Y) - 2)) #the df shiftTbl must be initialized before for loop
+
+for (i in 1:length(X[, 1])) {
+  Shiftrownum <- match(X[i, 2], Y[, 1])
+  shiftTbl[i, ] <- Y[Shiftrownum, 3:length(Y[1, ])]
+  Xtemp[i, (Xcolnum + 1):ADDnum] <- Y[Shiftrownum, 3:length(Y[1, ])]
+}
+headerX <- colnames(Xtemp)
+headerY <- colnames(Y)
+shfHdr <- headerY[3:length(headerY)]
+combTbl <- X[, 1:3]
+lmTbl <- select(Xtemp,  contains("Z_lm"))  #X[,(4:Xcolnum-2)]
+shiftTbl <- select(Xtemp,  contains("V"))
+clustTbl <- select(Xtemp, contains("cluster."))
+
+# Give the new column names the same names as in the shift file
+Xcols <- colnames(X)
+Ycols <- colnames(Y)[3:length(Y[1, ])]
+newCols <- c(Xcols[1:Xcolnum], Ycols)
+
+# Reorder columns for generating heatmaps
+combI <- combTbl  #Starting Template orf, Genename columns
+headersRemc <- newCols  #colnames(X)
+newHeaders <- newCols[1:3]
+lmHdr <- colnames(lmTbl)   #newCols[4:(length(Xcols)-2)]
+clstHdr <- colnames(clustTbl)  #select(newCols, contains('cluster.'))  #newCols[3+length(lmHdr):2]
+
+intLvHdr <- vector()
+#Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
+for (i in 1:(length(shiftTbl[1, ]))) {
+  combI <- cbind.data.frame(combI, shiftTbl[i])
+  combI <- cbind.data.frame(combI, lmTbl[i])
+  intLvHdrx <- c(shfHdr[i], lmHdr[i])
+  intLvHdr <- c(intLvHdr, intLvHdrx)
+}
+
+combIHdr <- c(colnames(combTbl), intLvHdr, clstHdr)
+combI <- cbind.data.frame(combI, clustTbl)
+colnames(combI) <- combIHdr
+write.csv(combI, file = output, row.names = FALSE)
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores.R
@@ -0,0 +1,587 @@
+suppressMessages({
+  library(ggplot2)
+  library(plotly)
+  library(htmlwidgets)
+  library(dplyr)
+  library(ggthemes)
+  library(data.table)
+})
+
+options(warn = 2, max.print = 1000)
+
+# Constants for configuration
+PLOT_WIDTH <- 14
+PLOT_HEIGHT <- 9
+BASE_SIZE <- 14
+
+parse_arguments <- function() {
+  args <- if (interactive()) {
+    c(
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD",
+      3,
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
+      "Experiment 1: Doxo versus HLD",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
+      "Experiment 2: HLD versus Doxo"
+    )
+  } else {
+    commandArgs(trailingOnly = TRUE)
+  }
+  
+  paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE)
+  names <- args[seq(6, length(args), by = 2)]
+  experiments <- setNames(paths, names)
+  
+  list(
+    out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
+    sd = as.numeric(args[2]),
+    sgd_gene_list = normalizePath(file.path(args[3]), mustWork = FALSE),
+    easy_results_file = normalizePath(file.path(args[4]), mustWork = FALSE),
+    experiments = experiments
+  )
+}
+
+args <- parse_arguments()
+
+dir.create(file.path(args$out_dir, "zscores"), showWarnings = FALSE)
+dir.create(file.path(args$out_dir, "zscores", "qc"), showWarnings = FALSE)
+
+# Define themes and scales
+theme_publication <- function(base_size = BASE_SIZE, base_family = "sans", legend_position = "bottom") {
+  theme_foundation(base_size = base_size, base_family = base_family) +
+    theme(
+      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
+      text = element_text(),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.title.y = element_text(angle = 90, vjust = 2),
+      axis.title.x = element_text(vjust = -0.2),
+      axis.line = element_line(colour = "black"),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.key = element_rect(colour = NA),
+      legend.position = legend_position,
+      legend.direction = ifelse(legend_position == "right", "vertical", "horizontal"),
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+}
+
+scale_fill_publication <- function(...) {
+  discrete_scale("fill", "Publication", manual_pal(values = c(
+    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
+  )), ...)
+}
+
+scale_colour_publication <- function(...) {
+  discrete_scale("colour", "Publication", manual_pal(values = c(
+    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
+  )), ...)
+}
+
+# Load SGD gene list
+sgd_genes <- function(sgd_gene_list) {
+  read.delim(file = sgd_gene_list, quote = "", header = FALSE,
+             colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
+    dplyr::rename(ORF = V4, GeneName = V5)
+}
+
+genes <- sgd_genes(args$sgd_gene_list)
+
+# Define the adjust_values function
+adjust_values <- function(x, delta_bg, tolerance) {
+  valid_x <- x[x != 0 & !is.na(x)]
+  max_x <- ifelse(length(valid_x) == 0, NA_real_, max(valid_x, na.rm = TRUE))
+  
+  x_adjusted <- ifelse(x == 0 & !is.na(x), max_x, x)
+  
+  # Adjust only if delta_bg is valid and above tolerance
+  x_adjusted <- ifelse(!is.na(delta_bg) & delta_bg >= tolerance, NA_real_, x_adjusted)
+  
+  return(x_adjusted)
+}
+
+
+
+load_and_preprocess_data <- function(easy_results_file, genes) {
+  df <-
+    read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
+
+  # Apply further transformations
+  df <- df %>%
+    filter(
+      !is.na(ORF) & ORF != "",
+      !ORF %in% c("Blank"),
+      !is.na(Gene) & !Gene %in% c("BLANK", "Blank", "blank"),
+      Drug != "BMH21",
+      !is.na(Scan),
+      !is.na(l), !is.na(K),
+      !is.na(r), !is.na(AUC96)
+    ) %>%
+    mutate(
+      Col = suppressWarnings(as.numeric(Col)),
+      Row = suppressWarnings(as.numeric(Row)),
+      L = suppressWarnings(as.numeric(l)),
+      K = suppressWarnings(as.numeric(K)),
+      r = suppressWarnings(as.numeric(r)),
+      Scan = suppressWarnings(as.numeric(Scan)),
+      AUC = suppressWarnings(as.numeric(AUC96)),
+      LstBackgrd = suppressWarnings(as.numeric(LstBackgrd)),
+      X1stBackgrd = suppressWarnings(as.numeric(X1stBackgrd)),
+      delta_bg = ifelse(is.na(LstBackgrd) | is.na(X1stBackgrd), NA_real_, LstBackgrd - X1stBackgrd),
+      delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + 3 * sd(delta_bg, na.rm = TRUE),
+      OrfRep = ifelse(ORF == "YDL227C", "YDL227C", OrfRep),
+      OrfRepUnique = paste(OrfRep, Gene, Num., sep = "_"),
+      conc_num = suppressWarnings(as.numeric(gsub("[^0-9\\.]", "", Conc))),
+      DB = as.integer(delta_bg >= delta_bg_tolerance),
+      SM = 0,
+      NG = as.integer(l == 0 & !is.na(l)),
+      L_adjusted = adjust_values(L, delta_bg, delta_bg_tolerance),
+      r_adjusted = adjust_values(r, delta_bg, delta_bg_tolerance),
+      AUC_adjusted = adjust_values(AUC, delta_bg, delta_bg_tolerance),
+      K_adjusted = adjust_values(K, delta_bg, delta_bg_tolerance),
+      max_L_adjusted = ifelse(all(is.na(L_adjusted)), NA_real_, max(L_adjusted, na.rm = TRUE)),
+      SM = ifelse(!is.na(max_L_adjusted) & L_adjusted == max_L_adjusted, 1, SM)
+    )
+
+  # Efficiently handle the assignment of Gene names
+  gene_map <- setNames(genes$GeneName, genes$ORF)
+  df$Gene <- ifelse(df$OrfRep != "YDL227C", gene_map[df$ORF], df$Gene)
+  df$Gene <- ifelse(df$Gene == "" | df$Gene == "OCT1", df$OrfRep, df$Gene)
+
+  return(df)
+}
+
+
+
+
+generate_plot <- function(df, var, plot_type = "scatter", out_dir_qc, suffix = "") {
+
+  # Filter out non-finite values before plotting
+  df <- df %>%
+    filter(is.finite(!!sym(var)))
+
+  plot_func <- switch(plot_type,
+    scatter = geom_point(shape = 3, size = 0.2, position = "jitter"),
+    boxplot = geom_boxplot())
+
+  p <- ggplot(df, aes(Scan, !!sym(var), color = as.factor(conc_num))) +
+    plot_func +
+    stat_summary(fun = mean, geom = "point", size = 0.6) +
+    stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
+    ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
+    theme_publication()
+
+  pdf_path <- file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))
+  pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
+  print(p)
+  dev.off()
+  
+  html_path <- sub(".pdf$", ".html", pdf_path)
+  pgg <- suppressWarnings(ggplotly(p) %>%
+    layout(legend = list(orientation = "h")))
+  saveWidget(pgg, html_path, selfcontained = TRUE)
+}
+
+generate_and_publish_qc <- function(df, out_dir_qc) {
+  variables <- c("L", "K", "r", "AUC", "delta_bg")
+  
+  # Access delta_bg_tolerance from the dataframe
+  delta_bg_tolerance <- df$delta_bg_tolerance[1]
+  
+  for (var in variables) {
+    if (var %in% colnames(df)) {
+      generate_plot(df, var, "scatter", out_dir_qc)
+    }
+  }
+
+  df_post_qc <- df %>%
+    mutate(across(all_of(variables), ~ ifelse(delta_bg >= delta_bg_tolerance, NA, .)))
+
+  for (var in variables) {
+    if (var %in% colnames(df_post_qc)) {
+      generate_plot(df_post_qc, var, "scatter", out_dir_qc, suffix = "_after_qc")
+    }
+  }
+
+  delta_bg_above_tolerance <- df[df$delta_bg >= delta_bg_tolerance, ]
+  for (var in variables) {
+    if (var %in% colnames(delta_bg_above_tolerance)) {
+      generate_plot(delta_bg_above_tolerance, var, "scatter", out_dir_qc, suffix = "_above_tolerance")
+    }
+  }
+
+  df_finite <- df %>% filter(is.finite(delta_bg))
+  delta_bg_frequency_plot <- ggplot(df_finite, aes(delta_bg, color = as.factor(conc_num))) +
+    geom_density() +
+    ggtitle("Density plot for Delta Background by Conc All Data") +
+    theme_publication(legend_position = "right")
+  
+  delta_bg_bar_plot <- ggplot(df_finite, aes(delta_bg, color = as.factor(conc_num))) +
+    geom_bar() +
+    ggtitle("Bar plot for Delta Background by Conc All Data") +
+    theme_publication(legend_position = "right")
+
+  pdf(file = file.path(out_dir_qc, "frequency_delta_background.pdf"), width = 12, height = 8)
+  print(delta_bg_frequency_plot)
+  print(delta_bg_bar_plot)
+  dev.off()
+  
+  saveWidget(ggplotly(delta_bg_frequency_plot),
+    file = file.path(out_dir_qc, "frequency_delta_background_density.html"), selfcontained = TRUE)
+  saveWidget(ggplotly(delta_bg_bar_plot),
+    file = file.path(out_dir_qc, "frequency_delta_background_bar.html"), selfcontained = TRUE)
+
+  df_no_zero <- df %>% filter(L > 0 & is.finite(L))
+
+  scatter_plots <- list()
+  box_plots <- list()
+  for (var in variables) {
+    if (var %in% colnames(df_no_zero)) {
+      scatter_plots[[var]] <- ggplot(df_no_zero, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
+        geom_point(shape = 3, size = 0.2) +
+        stat_summary(fun = mean, geom = "point", size = 0.6) +
+        stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
+        ggtitle(paste("Plate analysis by Drug Conc for", var, "after quality control")) +
+        theme_publication()
+
+      box_plots[[var]] <- ggplot(df_no_zero, aes(as.factor(Scan), .data[[var]], color = as.factor(conc_num))) +
+        geom_boxplot() +
+        ggtitle(paste("Plate analysis by Drug Conc for", var, "after quality control")) +
+        theme_publication()
+    }
+  }
+
+  pdf(file = file.path(out_dir_qc, "plate_analysis_no_zeros.pdf"), width = 14, height = 9)
+  lapply(scatter_plots, print)
+  dev.off()
+  
+  pdf(file = file.path(out_dir_qc, "plate_analysis_no_zeros_boxplots.pdf"), width = 18, height = 9)
+  lapply(box_plots, print)
+  dev.off()
+
+  for (var in names(scatter_plots)) {
+    html_path <- file.path(out_dir_qc, paste0("plate_analysis_no_zeros_", var, ".html"))
+
+    pgg <- suppressWarnings(ggplotly(scatter_plots[[var]]) %>%
+      layout(legend = list(orientation = "h")))
+    saveWidget(pgg, html_path, selfcontained = TRUE)
+  }
+
+  for (var in names(box_plots)) {
+    html_path <- file.path(out_dir_qc, paste0("plate_analysis_no_zeros_boxplots_", var, ".html"))
+
+    pgg <- suppressWarnings(ggplotly(box_plots[[var]]) %>%
+      layout(legend = list(orientation = "h")))
+    saveWidget(pgg, html_path, selfcontained = TRUE)
+  }
+}
+
+compute_experiment_summary_stats <- function(df, out_dir, background_strain = NULL) {
+  if (!is.null(background_strain)) {
+    df <- df %>% filter(OrfRep == background_strain)
+  }
+  
+  summary_stats <- df %>%
+    group_by(OrfRep, conc_num) %>%
+    summarize(
+      N = n(),
+      mean_L = mean(L, na.rm = TRUE),
+      median_L = median(L, na.rm = TRUE),
+      max_L = max(L, na.rm = TRUE),
+      min_L = min(L, na.rm = TRUE),
+      sd_L = sd(L, na.rm = TRUE),
+      se_L = sd_L / sqrt(N),
+      mean_K = mean(K, na.rm = TRUE),
+      median_K = median(K, na.rm = TRUE),
+      max_K = max(K, na.rm = TRUE),
+      min_K = min(K, na.rm = TRUE),
+      sd_K = sd(K, na.rm = TRUE),
+      se_K = sd_K / sqrt(N),
+      mean_r = mean(r, na.rm = TRUE),
+      median_r = median(r, na.rm = TRUE),
+      max_r = max(r, na.rm = TRUE),
+      min_r = min(r, na.rm = TRUE),
+      sd_r = sd(r, na.rm = TRUE),
+      se_r = sd_r / sqrt(N),
+      mean_AUC = mean(AUC, na.rm = TRUE),
+      median_AUC = median(AUC, na.rm = TRUE),
+      max_AUC = max(AUC, na.rm = TRUE),
+      min_AUC = min(AUC, na.rm = TRUE),
+      sd_AUC = sd(AUC, na.rm = TRUE),
+      se_AUC = sd_AUC / sqrt(N)
+    )
+
+  if (is.null(background_strain)) {
+    fwrite(summary_stats, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
+  } else {
+    fwrite(summary_stats, file.path(out_dir, "summary_stats_background_strains.csv"), row.names = FALSE)
+  }
+}
+
+
+filter_and_calculate_sd_k_stats <- function(df, out_dir_qc) {
+  df_cleaned <- df %>%
+    filter(is.finite(L_adjusted) & is.finite(K_adjusted))
+
+  df_within_2SD_K <- df_cleaned %>%
+    filter(abs(K_adjusted - mean(K_adjusted, na.rm = TRUE)) <= 2 * sd(K_adjusted, na.rm = TRUE)) %>%
+    group_by(conc_num) %>%
+    summarize(
+      N = n(),
+      mean_L = mean(L_adjusted, na.rm = TRUE),
+      max_L = ifelse(all(is.na(L_adjusted)), NA_real_, max(L_adjusted, na.rm = TRUE)),
+      sd_L = sd(L_adjusted, na.rm = TRUE)
+    )
+  fwrite(df_within_2SD_K, file.path(out_dir_qc, "max_observed_L_vals_for_spots_within_2SD_K.csv"))
+
+  df_outside_2SD_K <- df_cleaned %>%
+    filter(abs(K_adjusted - mean(K_adjusted, na.rm = TRUE)) > 2 * sd(K_adjusted, na.rm = TRUE)) %>%
+    group_by(conc_num) %>%
+    summarize(
+      N = n(),
+      mean_L = mean(L_adjusted, na.rm = TRUE),
+      max_L = ifelse(all(is.na(L_adjusted)), NA_real_, max(L_adjusted, na.rm = TRUE)),
+      sd_L = sd(L_adjusted, na.rm = TRUE)
+    )
+  fwrite(df_outside_2SD_K, file.path(out_dir_qc, "max_observed_L_vals_for_spots_outside_2SD_K.csv"))
+
+  p_raw_before <- ggplot(df_cleaned,
+    aes(x = L_adjusted, y = K_adjusted, color = as.factor(conc_num))) +
+    geom_point() +
+    ggtitle("Raw L vs K before QC") +
+    theme_publication(legend_position = "right")
+  
+  pdf(file.path(out_dir_qc, "raw_L_vs_K_before_QC.pdf"))
+  print(p_raw_before)
+  dev.off()
+  
+  saveWidget(ggplotly(p_raw_before), file.path(out_dir_qc, "raw_L_vs_K_before_QC.html"))
+
+  p_above_threshold <- ggplot(df_cleaned %>% filter(delta_bg > delta_bg_tolerance),
+    aes(x = L_adjusted, y = K_adjusted, color = as.factor(conc_num))) +
+    geom_point() +
+    ggtitle("Raw L vs K for strains above delta background threshold") +
+    theme_publication(legend_position = "right")
+  
+  pdf(file.path(out_dir_qc, "raw_L_vs_K_for_strains_above_delta_bg_threshold.pdf"))
+  print(p_above_threshold)
+  dev.off()
+  
+  saveWidget(ggplotly(p_above_threshold), file.path(out_dir_qc, "raw_L_vs_K_for_strains_above_delta_bg_threshold.html"))
+}
+
+compute_rf_interaction_scores <- function(df_rf, output_dir) {
+  lm_sd_L <- sd(df_rf$lm_Score_L, na.rm = TRUE)
+  lm_sd_K <- sd(df_rf$lm_Score_K, na.rm = TRUE)
+  lm_sd_r <- sd(df_rf$lm_Score_r, na.rm = TRUE)
+  lm_sd_AUC <- sd(df_rf$lm_Score_AUC, na.rm = TRUE)
+  
+  lm_mean_L <- mean(df_rf$lm_Score_L, na.rm = TRUE)
+  lm_mean_K <- mean(df_rf$lm_Score_K, na.rm = TRUE)
+  lm_mean_r <- mean(df_rf$lm_Score_r, na.rm = TRUE)
+  lm_mean_AUC <- mean(df_rf$lm_Score_AUC, na.rm = TRUE)
+  
+  df_rf <- df_rf %>%
+    mutate(
+      Z_lm_L = (lm_Score_L - lm_mean_L) / lm_sd_L,
+      Z_lm_K = (lm_Score_K - lm_mean_K) / lm_sd_K,
+      Z_lm_r = (lm_Score_r - lm_mean_r) / lm_sd_r,
+      Z_lm_AUC = (lm_Score_AUC - lm_mean_AUC) / lm_sd_AUC
+    )
+  
+  df_rf <- df_rf %>%
+      arrange(desc(Z_lm_L), desc(NG))
+  
+  fwrite(df_rf, file.path(output_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
+  
+  return(df_rf)
+}
+
+create_rank_plots <- function(interaction_scores_ranks, out_dir) {
+  rank_vars <- c("l_rank", "k_rank", "r_rank", "auc_rank")
+  
+  lapply(rank_vars, function(rank_var) {
+    p <- ggplot(interaction_scores_ranks, aes(x = !!sym(rank_var))) +
+      geom_bar() +
+      ggtitle(paste("Rank Distribution for", rank_var)) +
+      theme_publication()
+    
+    pdf_path <- file.path(out_dir, paste0("rank_distribution_", rank_var, ".pdf"))
+    pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
+    print(p)
+    dev.off()
+    
+    html_path <- sub(".pdf$", ".html", pdf_path)
+    pgg <- suppressWarnings(ggplotly(p) %>%
+      layout(legend = list(orientation = "h")))
+    saveWidget(pgg, html_path, selfcontained = TRUE)
+  })
+}
+
+create_correlation_plot <- function(interaction_scores, out_dir) {
+  interaction_scores <- interaction_scores %>%
+    filter_all(all_vars(is.finite(.)))
+  
+  pairs <- list(
+    c("Raw_Shift_L", "Raw_Shift_K"),
+    c("Raw_Shift_L", "Raw_Shift_r"),
+    c("Raw_Shift_L", "Raw_Shift_AUC"),
+    c("Raw_Shift_K", "Raw_Shift_r"),
+    c("Raw_Shift_K", "Raw_Shift_AUC"),
+    c("Raw_Shift_r", "Raw_Shift_AUC")
+  )
+  
+  lapply(pairs, function(vars) {
+    p <- ggplot(interaction_scores, aes(x = !!sym(vars[1]), y = !!sym(vars[2]))) +
+      geom_point() +
+      geom_smooth(method = "lm", se = FALSE) +
+      ggtitle(paste("Correlation between", vars[1], "and", vars[2])) +
+      theme_publication()
+    
+    pdf_path <- file.path(out_dir, paste0("correlation_", vars[1], "_", vars[2], ".pdf"))
+    pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
+    print(p)
+    dev.off()
+    
+    html_path <- sub(".pdf$", ".html", pdf_path)
+    pgg <- suppressWarnings(ggplotly(p, tooltip = c(vars[1], vars[2])) %>%
+      layout(legend = list(orientation = "h")))
+    saveWidget(pgg, html_path, selfcontained = TRUE)
+  })
+}
+
+process_experiment <- function(exp_name, exp_dir, genes, output_dir) {
+  out_dir <- file.path(exp_dir, "zscores")
+  out_dir_qc <- file.path(out_dir, "qc")
+  dir.create(out_dir, showWarnings = FALSE, recursive = TRUE)
+  dir.create(out_dir_qc, showWarnings = FALSE)
+
+  data <- load_and_preprocess_data(args$easy_results_file, genes)
+
+  # Generate and publish QC plots
+  generate_and_publish_qc(data, out_dir_qc)
+
+  # Calculate and save summary statistics
+  compute_experiment_summary_stats(data, out_dir)
+  compute_experiment_summary_stats(data, out_dir, background_strain = "YDL227C")
+
+  InteractionScores_RF <- data %>%
+    select(OrfRepUnique) %>%
+    distinct() %>%
+    mutate(
+      Gene = NA,
+      Raw_Shift_L = NA, Z_Shift_L = NA, lm_Score_L = NA, Z_lm_L = NA,
+      R_Squared_L = NA, Sum_Z_Score_L = NA, Avg_Zscore_L = NA,
+      Raw_Shift_K = NA, Z_Shift_K = NA, lm_Score_K = NA, Z_lm_K = NA,
+      R_Squared_K = NA, Sum_Z_Score_K = NA, Avg_Zscore_K = NA,
+      Raw_Shift_r = NA, Z_Shift_r = NA, lm_Score_r = NA, Z_lm_r = NA,
+      R_Squared_r = NA, Sum_Z_Score_r = NA, Avg_Zscore_r = NA,
+      Raw_Shift_AUC = NA, Z_Shift_AUC = NA, lm_Score_AUC = NA, Z_lm_AUC = NA,
+      R_Squared_AUC = NA, Sum_Z_Score_AUC = NA, Avg_Zscore_AUC = NA,
+      NG = NA, SM = NA
+    )
+
+  for (i in seq_along(unique(InteractionScores_RF$OrfRepUnique))) {
+    Gene_Sel <- unique(InteractionScores_RF$OrfRepUnique)[i]
+    X_Gene_Sel <- data[data$OrfRepUnique == Gene_Sel,]
+    
+    X_stats_interaction <- X_Gene_Sel %>%
+      group_by(OrfRepUnique, Gene) %>%
+      summarize(
+        Raw_Shift_L = mean(L_adjusted, na.rm = TRUE),
+        Raw_Shift_K = mean(K_adjusted, na.rm = TRUE),
+        Raw_Shift_r = mean(r_adjusted, na.rm = TRUE),
+        Raw_Shift_AUC = mean(AUC_adjusted, na.rm = TRUE),
+        Z_Shift_L = mean(scale(L_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
+        Z_Shift_K = mean(scale(K_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
+        Z_Shift_r = mean(scale(r_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
+        Z_Shift_AUC = mean(scale(AUC_adjusted, center = TRUE, scale = TRUE)[, 1], na.rm = TRUE),
+        lm_Score_L = coef(lm(L_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
+        R_Squared_L = summary(lm(L_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
+        lm_Score_K = coef(lm(K_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
+        R_Squared_K = summary(lm(K_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
+        lm_Score_r = coef(lm(r_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
+        R_Squared_r = summary(lm(r_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
+        lm_Score_AUC = coef(lm(AUC_adjusted ~ delta_bg, data = X_Gene_Sel))[2],
+        R_Squared_AUC = summary(lm(AUC_adjusted ~ delta_bg, data = X_Gene_Sel))$r.squared,
+        NG = sum(NG, na.rm = TRUE),
+        DB = sum(DB, na.rm = TRUE),
+        SM = sum(SM, na.rm = TRUE)
+      )
+    
+    InteractionScores_RF <- InteractionScores_RF %>%
+      mutate(
+        Gene = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Gene[1], Gene),
+        Raw_Shift_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_L[1], Raw_Shift_L),
+        Z_Shift_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_L[1], Z_Shift_L),
+        lm_Score_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_L[1], lm_Score_L),
+        R_Squared_L = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_L[1], R_Squared_L),
+        Sum_Z_Score_L = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_L, na.rm = TRUE), Sum_Z_Score_L),
+        Avg_Zscore_L = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_L, na.rm = TRUE), Avg_Zscore_L),
+        Raw_Shift_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_K[1], Raw_Shift_K),
+        Z_Shift_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_K[1], Z_Shift_K),
+        lm_Score_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_K[1], lm_Score_K),
+        R_Squared_K = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_K[1], R_Squared_K),
+        Sum_Z_Score_K = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_K, na.rm = TRUE), Sum_Z_Score_K),
+        Avg_Zscore_K = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_K, na.rm = TRUE), Avg_Zscore_K),
+        Raw_Shift_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_r[1], Raw_Shift_r),
+        Z_Shift_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_r[1], Z_Shift_r),
+        lm_Score_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_r[1], lm_Score_r),
+        R_Squared_r = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_r[1], R_Squared_r),
+        Sum_Z_Score_r = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_r, na.rm = TRUE), Sum_Z_Score_r),
+        Avg_Zscore_r = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_r, na.rm = TRUE), Avg_Zscore_r),
+        Raw_Shift_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Raw_Shift_AUC[1], Raw_Shift_AUC),
+        Z_Shift_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$Z_Shift_AUC[1], Z_Shift_AUC),
+        lm_Score_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$lm_Score_AUC[1], lm_Score_AUC),
+        R_Squared_AUC = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$R_Squared_AUC[1], R_Squared_AUC),
+        Sum_Z_Score_AUC = ifelse(OrfRepUnique == Gene_Sel, sum(X_stats_interaction$Z_Shift_AUC, na.rm = TRUE), Sum_Z_Score_AUC),
+        Avg_Zscore_AUC = ifelse(OrfRepUnique == Gene_Sel, mean(X_stats_interaction$Z_Shift_AUC, na.rm = TRUE), Avg_Zscore_AUC),
+        NG = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$NG[1], NG),
+        DB = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$DB[1], DB),
+        SM = ifelse(OrfRepUnique == Gene_Sel, X_stats_interaction$SM[1], SM)
+      )
+  }
+
+  fwrite(InteractionScores_RF, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
+
+  generate_and_publish_qc(data, out_dir_qc)
+
+  filter_and_calculate_sd_k_stats(data, out_dir_qc)
+
+  variables <- c("L", "K", "r", "AUC", "delta_bg")
+  compute_experiment_summary_stats(data, variables, out_dir)
+  compute_experiment_summary_stats(data, variables, out_dir, background_strain = "YDL227C")
+
+  compute_rf_interaction_scores(InteractionScores_RF, out_dir)
+
+  publish_scores(InteractionScores_RF, out_dir)
+
+  create_rank_plots(InteractionScores_RF, out_dir)
+  create_correlation_plot(InteractionScores_RF, out_dir)
+
+  output_file <- file.path(out_dir, "zscores_interaction.csv")
+  fwrite(InteractionScores_RF, output_file, row.names = FALSE)
+
+  return(output_file)
+}
+
+processed_files <- lapply(names(args$experiments), function(exp_name) {
+  process_experiment(exp_name, args$experiments[[exp_name]], genes, args$out_dir)
+})
+
+if (length(processed_files) > 1) {
+  combined_data <- Reduce(function(x, y) {
+    merge(fread(x), fread(y), by = "OrfRepUnique", all = TRUE)
+  }, processed_files)
+  
+  combined_output_file <- file.path(args$out_dir, "zscores", "zscores_interaction_combined.csv")
+  fwrite(combined_data, combined_output_file, row.names = FALSE)
+}
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores2.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores2.R
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores3.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores3.R
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores4.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores4.R
--- a/qhtcp-workflow/apps/r/calculate_interaction_zscores5.R
+++ b/qhtcp-workflow/apps/r/calculate_interaction_zscores5.R
@@ -0,0 +1,945 @@
+suppressMessages({
+  library(ggplot2)
+  library(plotly)
+  library(htmlwidgets)
+  library(dplyr)
+  library(ggthemes)
+  library(data.table)
+  library(unix)
+})
+
+options(warn = 2, max.print = 1000)
+options(width = 10000)
+
+# Set the memory limit to 30GB (30 * 1024 * 1024 * 1024 bytes)
+soft_limit <- 30 * 1024 * 1024 * 1024
+hard_limit <- 30 * 1024 * 1024 * 1024
+rlimit_as(soft_limit, hard_limit)
+
+# Constants for configuration
+plot_width <- 14
+plot_height <- 9
+base_size <- 14
+
+parse_arguments <- function() {
+  args <- if (interactive()) {
+    c(
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
+      "Experiment 1: Doxo versus HLD",
+      3,
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
+      "Experiment 2: HLD versus Doxo",
+      3
+    )
+  } else {
+    commandArgs(trailingOnly = TRUE)
+  }
+  
+  # Extract paths, names, and standard deviations
+  paths <- args[seq(4, length(args), by = 3)]
+  names <- args[seq(5, length(args), by = 3)]
+  sds <- as.numeric(args[seq(6, length(args), by = 3)])
+  
+  # Normalize paths
+  normalized_paths <- normalizePath(paths, mustWork = FALSE)
+  
+  # Create named list of experiments
+  experiments <- list()
+  for (i in seq_along(paths)) {
+    experiments[[names[i]]] <- list(
+      path = normalized_paths[i],
+      sd = sds[i]
+    )
+  }
+  
+  list(
+    out_dir = normalizePath(args[1], mustWork = FALSE),
+    sgd_gene_list = normalizePath(args[2], mustWork = FALSE),
+    easy_results_file = normalizePath(args[3], mustWork = FALSE),
+    experiments = experiments
+  )
+}
+
+args <- parse_arguments()
+
+# Should we keep output in exp dirs or combine in the study output dir?
+# dir.create(file.path(args$out_dir, "zscores"), showWarnings = FALSE)
+# dir.create(file.path(args$out_dir, "zscores", "qc"), showWarnings = FALSE)
+
+# Define themes and scales
+theme_publication <- function(base_size = 14, base_family = "sans", legend_position = "bottom") {
+  theme_foundation <- ggplot2::theme_grey(base_size = base_size, base_family = base_family)
+  
+  theme_foundation %+replace%
+    theme(
+      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
+      text = element_text(),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.title.y = element_text(angle = 90, vjust = 2),
+      axis.title.x = element_text(vjust = -0.2),
+      axis.line = element_line(colour = "black"),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.key = element_rect(colour = NA),
+      legend.position = legend_position,
+      legend.direction = ifelse(legend_position == "right", "vertical", "horizontal"),
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+}
+
+scale_fill_publication <- function(...) {
+  discrete_scale("fill", "Publication", manual_pal(values = c(
+    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
+  )), ...)
+}
+
+scale_colour_publication <- function(...) {
+  discrete_scale("colour", "Publication", manual_pal(values = c(
+    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
+  )), ...)
+}
+
+# Load the initial dataframe from the easy_results_file
+load_and_process_data <- function(easy_results_file, sd = 3) {
+  df <- read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
+  
+  df <- df %>%
+    filter(!(.[[1]] %in% c("", "Scan"))) %>%
+    filter(!is.na(ORF) & ORF != "" & !Gene %in% c("BLANK", "Blank", "blank") & Drug != "BMH21") %>%
+    # Rename columns
+    rename(L = l, num = Num., AUC = AUC96, scan = Scan, last_bg = LstBackgrd, first_bg = X1stBackgrd) %>%
+    mutate(
+      across(c(Col, Row, num, L, K, r, scan, AUC, last_bg, first_bg), as.numeric),
+      delta_bg = last_bg - first_bg,
+      delta_bg_tolerance = mean(delta_bg, na.rm = TRUE) + (sd * sd(delta_bg, na.rm = TRUE)),
+      NG = if_else(L == 0 & !is.na(L), 1, 0),
+      DB = if_else(delta_bg >= delta_bg_tolerance, 1, 0),
+      SM = 0,
+      OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep), # should these be hardcoded?
+      conc_num = as.numeric(gsub("[^0-9\\.]", "", Conc)),
+      conc_num_factor = as.numeric(as.factor(conc_num)) - 1
+    )
+  
+  return(df)
+}
+
+# Update Gene names using the SGD gene list
+update_gene_names <- function(df, sgd_gene_list) {
+  # Load SGD gene list
+  genes <- read.delim(file = sgd_gene_list,
+    quote = "", header = FALSE,
+    colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11)))
+  
+  # Create a named vector for mapping ORF to GeneName
+  gene_map <- setNames(genes$V5, genes$V4)
+  # Vectorized match to find the GeneName from gene_map
+  mapped_genes <- gene_map[df$ORF]
+  # Replace NAs in mapped_genes with original Gene names (preserves existing Gene names if ORF is not found)
+  updated_genes <- ifelse(is.na(mapped_genes) | df$OrfRep == "YDL227C", df$Gene, mapped_genes)
+  # Ensure Gene is not left blank or incorrectly updated to "OCT1"
+  df <- df %>%
+    mutate(Gene = ifelse(updated_genes == "" | updated_genes == "OCT1", OrfRep, updated_genes))
+  
+  return(df)
+}
+
+# Calculate summary statistics for all variables
+calculate_summary_stats <- function(df, variables, group_vars = c("conc_num", "conc_num_factor")) {
+  df <- df %>%
+    mutate(across(all_of(variables), ~ ifelse(. == 0, NA, .)))
+
+  summary_stats <- df %>%
+    group_by(across(all_of(group_vars))) %>%
+    summarise(
+      N = sum(!is.na(L)),
+      across(all_of(variables), list(
+        mean = ~mean(., na.rm = TRUE),
+        median = ~median(., na.rm = TRUE),
+        max = ~max(., na.rm = TRUE),
+        min = ~min(., na.rm = TRUE),
+        sd = ~sd(., na.rm = TRUE),
+        se = ~sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1)
+      ), .names = "{.fn}_{.col}")
+    )
+
+  df_cleaned <- df %>%
+    select(-any_of(names(summary_stats)))
+
+  df_with_stats <- left_join(df_cleaned, summary_stats, by = group_vars)
+
+  return(list(summary_stats = summary_stats, df_with_stats = df_with_stats))
+}
+
+
+calculate_interaction_scores <- function(df, max_conc, variables, group_vars = c("OrfRep", "Gene", "num")) {
+
+  # Calculate total concentration variables
+  total_conc_num <- length(unique(df$conc_num))
+  num_non_removed_concs <- total_conc_num - sum(df$DB, na.rm = TRUE) - 1
+
+  # Pull the background means and standard deviations from zero concentration
+  bg_means <- list(
+    L = df %>% filter(conc_num_factor == 0) %>% pull(mean_L) %>% first(),
+    K = df %>% filter(conc_num_factor == 0) %>% pull(mean_K) %>% first(),
+    r = df %>% filter(conc_num_factor == 0) %>% pull(mean_r) %>% first(),
+    AUC = df %>% filter(conc_num_factor == 0) %>% pull(mean_AUC) %>% first()
+  )
+
+  bg_sd <- list(
+    L = df %>% filter(conc_num_factor == 0) %>% pull(sd_L) %>% first(),
+    K = df %>% filter(conc_num_factor == 0) %>% pull(sd_K) %>% first(),
+    r = df %>% filter(conc_num_factor == 0) %>% pull(sd_r) %>% first(),
+    AUC = df %>% filter(conc_num_factor == 0) %>% pull(sd_AUC) %>% first()
+  )
+
+  interaction_scores <- df %>%
+    mutate(
+      WT_L = df$mean_L,
+      WT_K = df$mean_K,
+      WT_r = df$mean_r,
+      WT_AUC = df$mean_AUC,
+      WT_sd_L = df$sd_L,
+      WT_sd_K = df$sd_K,
+      WT_sd_r = df$sd_r,
+      WT_sd_AUC = df$sd_AUC
+    ) %>%
+    group_by(across(all_of(group_vars)), conc_num, conc_num_factor) %>%
+      mutate(
+        N = sum(!is.na(L)),
+        NG = sum(NG, na.rm = TRUE),
+        DB = sum(DB, na.rm = TRUE),
+        SM = sum(SM, na.rm = TRUE),
+        across(all_of(variables), list(
+          mean = ~mean(., na.rm = TRUE),
+          median = ~median(., na.rm = TRUE),
+          max = ~max(., na.rm = TRUE),
+          min = ~min(., na.rm = TRUE),
+          sd = ~sd(., na.rm = TRUE),
+          se = ~sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)) - 1)
+        ), .names = "{.fn}_{.col}")
+      ) %>%
+    ungroup()
+
+  interaction_scores <- interaction_scores %>%
+    group_by(across(all_of(group_vars))) %>%
+      mutate(
+        Raw_Shift_L = mean_L[[1]] - bg_means$L,
+        Raw_Shift_K = mean_K[[1]] - bg_means$K,
+        Raw_Shift_r = mean_r[[1]] - bg_means$r,
+        Raw_Shift_AUC = mean_AUC[[1]] - bg_means$AUC,
+        Z_Shift_L = Raw_Shift_L[[1]] / df$sd_L[[1]],
+        Z_Shift_K = Raw_Shift_K[[1]] / df$sd_K[[1]],
+        Z_Shift_r = Raw_Shift_r[[1]] / df$sd_r[[1]],
+        Z_Shift_AUC = Raw_Shift_AUC[[1]] / df$sd_AUC[[1]]
+      )
+
+  interaction_scores <- interaction_scores %>%
+    mutate(
+      Exp_L = WT_L + Raw_Shift_L,
+      Delta_L = mean_L - Exp_L,
+      Exp_K = WT_K + Raw_Shift_K,
+      Delta_K = mean_K - Exp_K,
+      Exp_r = WT_r + Raw_Shift_r,
+      Delta_r = mean_r - Exp_r,
+      Exp_AUC = WT_AUC + Raw_Shift_AUC,
+      Delta_AUC = mean_AUC - Exp_AUC
+    )
+
+  interaction_scores <- interaction_scores %>%
+    mutate(
+      Delta_L = if_else(NG == 1, mean_L - WT_L, Delta_L),
+      Delta_K = if_else(NG == 1, mean_K - WT_K, Delta_K),
+      Delta_r = if_else(NG == 1, mean_r - WT_r, Delta_r),
+      Delta_AUC = if_else(NG == 1, mean_AUC - WT_AUC, Delta_AUC),
+      Delta_L = if_else(SM == 1, mean_L - WT_L, Delta_L)
+    )
+
+  # Calculate linear models and interaction scores
+  interaction_scores <- interaction_scores %>%
+    mutate(
+      lm_L = lm(Delta_L ~ conc_num_factor),
+      lm_K = lm(Delta_K ~ conc_num_factor),
+      lm_r = lm(Delta_r ~ conc_num_factor),
+      lm_AUC = lm(Delta_AUC ~ conc_num_factor),
+      Zscore_L = Delta_L / WT_sd_L,
+      Zscore_K = Delta_K / WT_sd_K,
+      Zscore_r = Delta_r / WT_sd_r,
+      Zscore_AUC = Delta_AUC / WT_sd_AUC
+    )
+
+  interaction_scores <- interaction_scores %>%
+    mutate(
+      Sum_Zscore_L = sum(Zscore_L, na.rm = TRUE),
+      Sum_Zscore_K = sum(Zscore_K, na.rm = TRUE),
+      Sum_Zscore_r = sum(Zscore_r, na.rm = TRUE),
+      Sum_Zscore_AUC = sum(Zscore_AUC, na.rm = TRUE)
+    )
+
+  interaction_scores_all <- interaction_scores %>%
+    mutate(
+      Avg_Zscore_L = Sum_Zscore_L / num_non_removed_concs,
+      Avg_Zscore_K = Sum_Zscore_K / num_non_removed_concs,
+      Avg_Zscore_r = Sum_Zscore_r / (total_conc_num - 1),
+      Avg_Zscore_AUC = Sum_Zscore_AUC / (total_conc_num - 1),
+      lm_Score_L = max_conc * coef(lm_L)[2] + coef(lm_L)[1],
+      lm_Score_K = max_conc * coef(lm_K)[2] + coef(lm_K)[1],
+      lm_Score_r = max_conc * coef(lm_r)[2] + coef(lm_r)[1],
+      lm_Score_AUC = max_conc * coef(lm_AUC)[2] + coef(lm_AUC)[1],
+      r_squared_L = summary(lm_L)$r.squared,
+      r_squared_K = summary(lm_K)$r.squared,
+      r_squared_r = summary(lm_r)$r.squared,
+      r_squared_AUC = summary(lm_AUC)$r.squared
+    )
+
+  # Calculate Z_lm for each variable
+  interaction_scores_all <- interaction_scores_all %>%
+    mutate(
+      Z_lm_L = (lm_Score_L - mean(lm_Score_L, na.rm = TRUE)) / sd(lm_Score_L, na.rm = TRUE),
+      Z_lm_K = (lm_Score_K - mean(lm_Score_K, na.rm = TRUE)) / sd(lm_Score_K, na.rm = TRUE),
+      Z_lm_r = (lm_Score_r - mean(lm_Score_r, na.rm = TRUE)) / sd(lm_Score_r, na.rm = TRUE),
+      Z_lm_AUC = (lm_Score_AUC - mean(lm_Score_AUC, na.rm = TRUE)) / sd(lm_Score_AUC, na.rm = TRUE)
+    )
+
+  # Arrange results by Z_lm_L and NG
+  interaction_scores_all <- interaction_scores_all %>%
+    arrange(desc(lm_Score_L)) %>%
+    arrange(desc(NG)) %>%
+    ungroup()
+
+  return(list(zscores_calculations = interaction_scores_all, zscores_interactions = interaction_scores))
+}
+
+generate_and_save_plots <- function(output_dir, file_name, plot_configs, grid_layout = NULL) {
+  
+  `%||%` <- function(a, b) if (!is.null(a)) a else b
+  
+  # Generalized plot generation
+  plots <- lapply(plot_configs, function(config) {
+    
+    df <- config$df
+    plot <- ggplot(df, aes(x = !!sym(config$x_var), y = !!sym(config$y_var), color = as.factor(!!sym(config$color_var))))
+    
+    # Rank plots with SD annotations
+    if (config$plot_type == "rank") {
+      plot <- plot + geom_point(size = 0.1, shape = 3)
+      
+      # Adding SD bands
+      if (!is.null(config$sd_band)) {
+        for (i in seq_len(config$sd_band)) {
+          plot <- plot +
+            annotate("rect", xmin = -Inf, xmax = Inf, ymin = i, ymax = Inf, fill = "#542788", alpha = 0.3) +
+            annotate("rect", xmin = -Inf, xmax = Inf, ymin = -i, ymax = -Inf, fill = "orange", alpha = 0.3) +
+            geom_hline(yintercept = c(-i, i), color = "gray")
+        }
+      }
+
+      # Optionally add enhancer/suppressor count annotations
+      if (!is.null(config$enhancer_label)) {
+        plot <- plot + annotate("text", x = config$enhancer_label$x,
+          y = config$enhancer_label$y, label = config$enhancer_label$label) +
+          annotate("text", x = config$suppressor_label$x,
+            y = config$suppressor_label$y, label = config$suppressor_label$label)
+      }
+    }
+    
+    # Correlation plots
+    if (config$plot_type == "correlation") {
+      plot <- plot + geom_point(shape = 3, color = "gray70") +
+        geom_smooth(method = "lm", color = "tomato3") +
+        annotate("text", x = 0, y = 0, label = config$correlation_text)
+    }
+
+    # General scatter/boxplot/density handling
+    if (!is.null(config$y_var)) {
+      plot <- plot + aes(y = !!sym(config$y_var))
+      
+      y_mean_col <- paste0("mean_", config$y_var)
+      y_sd_col <- paste0("sd_", config$y_var)
+      
+      if (config$y_var == "delta_bg" && config$plot_type == "scatter") {
+        plot <- plot + geom_point(shape = 3, size = 0.2, position = "jitter") +
+          geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
+            ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
+          geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
+      } else if (config$error_bar %||% FALSE) {
+        plot <- plot +
+          geom_point(shape = 3, size = 0.2) +
+          geom_errorbar(aes(ymin = !!sym(y_mean_col) - !!sym(y_sd_col),
+            ymax = !!sym(y_mean_col) + !!sym(y_sd_col)), width = 0.1) +
+          geom_point(aes(y = !!sym(y_mean_col)), size = 0.6)
+      }
+    }
+
+    # Plot type selection
+    plot <- switch(config$plot_type,
+      "box" = plot + geom_boxplot(),
+      "density" = plot + geom_density(),
+      "bar" = plot + geom_bar(stat = "identity"),
+      plot + geom_point() + geom_smooth(method = "lm", se = FALSE))
+    
+    # Adding y-limits if provided
+    if (!is.null(config$ylim_vals)) {
+      plot <- plot + coord_cartesian(ylim = config$ylim_vals)
+    }
+
+    # Setting legend position, titles, and labels
+    legend_position <- config$legend_position %||% "bottom"
+    plot <- plot + ggtitle(config$title) + theme_Publication(legend_position = legend_position)
+    
+    if (!is.null(config$x_label)) plot <- plot + xlab(config$x_label)
+    if (!is.null(config$y_label)) plot <- plot + ylab(config$y_label)
+    
+    # Adding text annotations if provided
+    if (!is.null(config$annotations)) {
+      for (annotation in config$annotations) {
+        plot <- plot + annotate("text", x = annotation$x, y = annotation$y, label = annotation$label)
+      }
+    }
+    
+    return(plot)
+  })
+  
+  # If grid_layout is provided, arrange plots in a grid and save in a single PDF
+  if (!is.null(grid_layout)) {
+    pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
+    
+    # Loop through plots in chunks defined by ncol and nrow
+    for (start_idx in seq(1, length(plots), by = grid_layout$ncol * grid_layout$nrow)) {
+      end_idx <- min(start_idx + grid_layout$ncol * grid_layout$nrow - 1, length(plots))
+      plot_subset <- plots[start_idx:end_idx]
+      
+      # Arrange plots in a grid
+      do.call(grid.arrange, c(plot_subset, ncol = grid_layout$ncol, nrow = grid_layout$nrow))
+    }
+    
+    dev.off()
+    
+    # Save as HTML (optional)
+    plotly_plots <- lapply(plots, function(plot) {
+      suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))
+    })
+    combined_plot <- subplot(plotly_plots, nrows = grid_layout$nrow, margin = 0.05)
+    saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, "_grid.html")), selfcontained = TRUE)
+    
+  } else {
+    # Save individual plots to PDF
+    pdf(file.path(output_dir, paste0(file_name, ".pdf")), width = 14, height = 9)
+    lapply(plots, print)
+    dev.off()
+    
+    # Convert plots to plotly and save as HTML
+    plotly_plots <- lapply(plots, function(plot) {
+      suppressWarnings(ggplotly(plot) %>% layout(legend = list(orientation = "h")))
+    })
+    combined_plot <- subplot(plotly_plots, nrows = length(plotly_plots), margin = 0.05)
+    saveWidget(combined_plot, file = file.path(output_dir, paste0(file_name, ".html")), selfcontained = TRUE)
+  }
+}
+
+generate_interaction_plot_configs <- function(df, variables) {
+  plot_configs <- list()
+
+  for (variable in variables) {
+    # Define the y-limits based on the variable being plotted
+    ylim_vals <- switch(variable,
+      "L" = c(-65, 65),
+      "K" = c(-65, 65),
+      "r" = c(-0.65, 0.65),
+      "AUC" = c(-6500, 6500)
+    )
+
+    # Dynamically generate the column names for standard deviation and delta
+    wt_sd_col <- paste0("WT_sd_", variable)
+    delta_var <- paste0("Delta_", variable)
+    z_shift <- paste0("Z_Shift_", variable)
+    z_lm <- paste0("Z_lm_", variable)
+
+    # Set annotations for ZShift, Z lm Score, NG, DB, SM
+    annotations <- list(
+      list(x = 1, y = ifelse(variable == "L", 45, ifelse(variable == "K", 45,
+        ifelse(variable == "r", 0.45, 4500))), label = paste("ZShift =", round(df[[z_shift]], 2))),
+      list(x = 1, y = ifelse(variable == "L", 25, ifelse(variable == "K", 25,
+        ifelse(variable == "r", 0.25, 2500))), label = paste("lm ZScore =", round(df[[z_lm]], 2))),
+      list(x = 1, y = ifelse(variable == "L", -25, ifelse(variable == "K", -25,
+        ifelse(variable == "r", -0.25, -2500))), label = paste("NG =", df$NG)),
+      list(x = 1, y = ifelse(variable == "L", -35, ifelse(variable == "K", -35,
+        ifelse(variable == "r", -0.35, -3500))), label = paste("DB =", df$DB)),
+      list(x = 1, y = ifelse(variable == "L", -45, ifelse(variable == "K", -45,
+        ifelse(variable == "r", -0.45, -4500))), label = paste("SM =", df$SM))
+    )
+
+    # Add scatter plot configuration for this variable
+    plot_configs[[length(plot_configs) + 1]] <- list(
+      df = df,
+      x_var = "conc_num_factor",
+      y_var = delta_var,
+      plot_type = "scatter",
+      title = sprintf("%s      %s", df$OrfRep[1], df$Gene[1]),
+      ylim_vals = ylim_vals,
+      annotations = annotations,
+      error_bar = list(
+        ymin = 0 - (2 * df[[wt_sd_col]][1]),
+        ymax = 0 + (2 * df[[wt_sd_col]][1])
+      ),
+      x_breaks = unique(df$conc_num_factor),
+      x_labels = unique(as.character(df$conc_num)),
+      x_label = unique(df$Drug[1])
+    )
+
+    # Add box plot configuration for this variable
+    plot_configs[[length(plot_configs) + 1]] <- list(
+      df = df,
+      x_var = "conc_num_factor",
+      y_var = variable,
+      plot_type = "box",
+      title = sprintf("%s      %s (Boxplot)", df$OrfRep[1], df$Gene[1]),
+      ylim_vals = ylim_vals,
+      annotations = annotations,
+      error_bar = FALSE,  # Boxplots typically don't need error bars
+      x_breaks = unique(df$conc_num_factor),
+      x_labels = unique(as.character(df$conc_num)),
+      x_label = unique(df$Drug[1])
+    )
+  }
+
+  return(plot_configs)
+}
+
+generate_rank_plot_configs <- function(df, rank_var, zscore_var, label_prefix) {
+  configs <- list()
+  
+  for (sd_band in c(1, 2, 3)) {
+    # Annotated version
+    configs[[length(configs) + 1]] <- list(
+      df = df,
+      x_var = rank_var,
+      y_var = zscore_var,
+      plot_type = "rank",
+      title = paste("Average Z score vs. Rank for", label_prefix, "above", sd_band, "SD"),
+      sd_band = sd_band,
+      enhancer_label = list(
+        x = nrow(df) / 2, y = 10,
+        label = paste("Deletion Enhancers =", nrow(df[df[[zscore_var]] >= sd_band, ]))
+      ),
+      suppressor_label = list(
+        x = nrow(df) / 2, y = -10,
+        label = paste("Deletion Suppressors =", nrow(df[df[[zscore_var]] <= -sd_band, ]))
+      )
+    )
+    
+    # Non-annotated version
+    configs[[length(configs) + 1]] <- list(
+      df = df,
+      x_var = rank_var,
+      y_var = zscore_var,
+      plot_type = "rank",
+      title = paste("Average Z score vs. Rank for", label_prefix, "above", sd_band, "SD"),
+      sd_band = sd_band
+    )
+  }
+  
+  return(configs)
+}
+
+generate_correlation_plot_configs <- function(df, lm_list, lm_summaries) {
+  lapply(seq_along(lm_list), function(i) {
+    r_squared <- round(lm_summaries[[i]]$r.squared, 3)
+    list(
+      x_var = names(lm_list)[i][1],
+      y_var = names(lm_list)[i][2],
+      plot_type = "scatter",
+      title = paste("Correlation between", names(lm_list)[i][1], "and", names(lm_list)[i][2]),
+      annotations = list(list(x = 0, y = 0, label = paste("R-squared =", r_squared)))
+    )
+  })
+}
+
+# Adjust missing values and calculate ranks
+adjust_missing_and_rank <- function(df, variables) {
+
+  # Adjust missing values in Avg_Zscore and Z_lm columns, and apply rank to the specified variables
+  df <- df %>%
+    mutate(across(all_of(variables), list(
+      Avg_Zscore = ~ if_else(is.na(get(paste0("Avg_Zscore_", cur_column()))), 0.001, get(paste0("Avg_Zscore_", cur_column()))),
+      Z_lm = ~ if_else(is.na(get(paste0("Z_lm_", cur_column()))), 0.001, get(paste0("Z_lm_", cur_column()))),
+      Rank = ~ rank(get(paste0("Avg_Zscore_", cur_column()))),
+      Rank_lm = ~ rank(get(paste0("Z_lm_", cur_column())))
+    ), .names = "{fn}_{col}"))
+
+  return(df)
+}
+
+main <- function() {
+  lapply(names(args$experiments), function(exp_name) {
+    exp <- args$experiments[[exp_name]]
+    exp_path <- exp$path
+    exp_sd <- exp$sd
+    out_dir <- file.path(exp_path, "zscores")
+    out_dir_qc <- file.path(exp_path, "zscores", "qc")
+    dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
+    dir.create(out_dir_qc, recursive = TRUE, showWarnings = FALSE)
+    
+    # Load and process data
+    df <- load_and_process_data(args$easy_results_file, sd = exp_sd)
+    df <- update_gene_names(df, args$sgd_gene_list)
+    
+    max_conc <- max(df$conc_num_factor)
+    
+    # QC steps and filtering
+    df_above_tolerance <- df %>% filter(DB == 1)
+
+    # Calculate the half-medians for `L` and `K` for rows above tolerance
+    L_half_median <- (median(df_above_tolerance$L, na.rm = TRUE)) / 2
+    K_half_median <- (median(df_above_tolerance$K, na.rm = TRUE)) / 2
+
+    # Get the number of rows that are above tolerance
+    rows_to_remove <- nrow(df_above_tolerance)
+
+    # Set L, r, K, and AUC to NA for rows that are above tolerance
+    df_na <- df %>% mutate(across(c(L, r, AUC, K), ~ ifelse(DB == 1, NA, .)))
+
+    # Calculate summary statistics for all strains, including both background and the deletions
+    message("Calculating summary statistics for all strains")
+    variables <- c("L", "K", "r", "AUC")
+    ss <- calculate_summary_stats(df_na, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
+    summary_stats <- ss$summary_stats
+    df_na_stats <- ss$df_with_stats
+    write.csv(summary_stats, file = file.path(out_dir, "SummaryStats_ALLSTRAINS.csv"), row.names = FALSE)
+
+    print("Summary stats:")
+    print(head(summary_stats), width = 200)
+
+    # Remove rows with 0 values in L
+    df_no_zeros <- df_na %>% filter(L > 0)
+
+    # Additional filtering for non-finite values
+    df_na_filtered <- df_na %>%
+      filter(if_any(c(L), ~ !is.finite(.))) %>%
+      {
+        if (nrow(.) > 0) {
+          message("Removing non-finite rows:\n")
+          print(head(., n = 10))
+        }
+        df_na %>% filter(if_all(c(L), is.finite))
+      }
+
+    # Filter data within and outside 2SD
+    message("Filtering by 2SD of K")
+    df_na_within_2sd_k <- df_na_stats %>%
+      filter(K >= (mean_K - 2 * sd_K) & K <= (mean_K + 2 * sd_K))
+    df_na_outside_2sd_k <- df_na_stats %>%
+      filter(K < (mean_K - 2 * sd_K) | K > (mean_K + 2 * sd_K))
+
+    # Summary statistics for within and outside 2SD of K
+    message("Calculating summary statistics for L within 2SD of K")
+    # TODO We're omitting the original z_max calculation, not sure if needed?
+    ss <- calculate_summary_stats(df_na_within_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
+    l_within_2sd_k_stats <- ss$summary_stats
+    df_na_l_within_2sd_k_stats <- ss$df_with_stats
+    message("Calculating summary statistics for L outside 2SD of K")
+    ss <- calculate_summary_stats(df_na_outside_2sd_k, "L", group_vars = c("conc_num", "conc_num_factor"))
+    l_outside_2sd_k_stats <- ss$summary_stats
+    df_na_l_outside_2sd_k_stats <- ss$df_with_stats
+    # Write CSV files
+    write.csv(l_within_2sd_k_stats, file = file.path(out_dir_qc, "Max_Observed_L_Vals_for_spots_within_2sd_k.csv"), row.names = FALSE)
+    write.csv(l_outside_2sd_k_stats, file = file.path(out_dir, "Max_Observed_L_Vals_for_spots_outside_2sd_k.csv"), row.names = FALSE)
+
+    # Plots
+
+    # Print quality control graphs before removing data due to contamination and
+    # adjusting missing data to max theoretical values
+    l_vs_k_plots <- list(
+      list(df = df, x_var = "L", y_var = "K", plot_type = "scatter",
+        title = "Raw L vs K before QC",
+        color_var = "conc_num",
+        legend_position = "right"
+      )
+    )
+
+    above_threshold_plots <- list(
+      list(df = df_above_tolerance, x_var = "L", y_var = "K", plot_type = "scatter",
+        title = paste("Raw L vs K for strains above delta background threshold of", df_above_tolerance$delta_bg_tolerance[[1]], "or above"),
+        color_var = "conc_num",
+        annotations = list(
+          list(
+            x = L_half_median,
+            y = K_half_median,
+            label = paste("Strains above delta background tolerance =", nrow(df_above_tolerance))
+          )
+        ),
+        error_bar = FALSE,
+        legend_position = "right"
+      )
+    )
+
+    frequency_delta_bg_plots <- list(
+      list(df = df, x_var = "delta_bg", y_var = NULL, plot_type = "density",
+        title = "Density plot for Delta Background by Conc All Data",
+        color_var = "conc_num",
+        x_label = "Delta Background",
+        y_label = "Density",
+        error_bar = FALSE,
+        legend_position = "right"
+      ),
+      list(df = df, x_var = "delta_bg", y_var = NULL, plot_type = "bar",
+        title = "Bar plot for Delta Background by Conc All Data",
+        color_var = "conc_num",
+        x_label = "Delta Background",
+        y_label = "Count",
+        error_bar = FALSE,
+        legend_position = "right"
+      )
+    )
+
+    plate_analysis_plots <- list()
+    for (plot_type in c("scatter", "box")) {
+      variables <- c("L", "K", "r", "AUC", "delta_bg")
+      for (var in variables) {
+        for (stage in c("before", "after")) {
+          if (stage == "before") {
+            df_plot <- df
+          } else {
+            df_plot <- df_na # TODO use df_na_filtered if necessary
+          }
+          
+          # Set error_bar = TRUE only for scatter plots
+          error_bar <- ifelse(plot_type == "scatter", TRUE, FALSE)
+          
+          # Create the plot configuration
+          plot_config <- list(df = df_plot, x_var = "scan", y_var = var, plot_type = plot_type,
+            title = paste("Plate analysis by Drug Conc for", var, stage, "quality control"),
+            error_bar = error_bar, color_var = "conc_num")
+
+          plate_analysis_plots <- append(plate_analysis_plots, list(plot_config))
+        }
+      }
+    }
+
+    plate_analysis_no_zero_plots <- list()
+      for (plot_type in c("scatter", "box")) {
+        variables <- c("L", "K", "r", "AUC", "delta_bg")
+        for (var in variables) {
+            
+          # Set error_bar = TRUE only for scatter plots
+          error_bar <- ifelse(plot_type == "scatter", TRUE, FALSE)
+          
+          # Create the plot configuration
+          plot_config <- list(
+            df = df_no_zeros,
+            x_var = "scan",
+            y_var = var,
+            plot_type = plot_type,
+            title = paste("Plate analysis by Drug Conc for", var, "after quality control"),
+            error_bar = error_bar,
+            color_var = "conc_num"
+          )
+          plate_analysis_plots <- append(plate_analysis_plots, list(plot_config))
+        }
+      }
+
+    l_outside_2sd_k_plots <- list(
+      list(df = X_outside_2SD_K, x_var = "l", y_var = "K", plot_type = "scatter",
+        title = "Raw L vs K for strains falling outside 2SD of the K mean at each Conc",
+        color_var = "conc_num",
+        legend_position = "right"
+      )
+    )
+
+    delta_bg_outside_2sd_k_plots <- list(
+      list(df = X_outside_2SD_K, x_var = "delta_bg", y_var = "K", plot_type = "scatter",
+        title = "Delta Background vs K for strains falling outside 2SD of the K mean at each Conc",
+        color_var = "conc_num",
+        legend_position = "right"
+      )
+    )
+
+    # Generate and save plots for each QC step
+    message("Generating QC plots")
+    generate_and_save_plots(out_dir_qc, "L_vs_K_before_quality_control", l_vs_k_plots)
+    generate_and_save_plots(out_dir_qc, "L_vs_K_above_threshold", above_threshold_plots)
+    generate_and_save_plots(out_dir_qc, "frequency_delta_background", frequency_delta_bg_plots)
+    generate_and_save_plots(out_dir_qc, "plate_analysis", plate_analysis_plots)
+    generate_and_save_plots(out_dir_qc, "plate_analysis_no_zeros", plate_analysis_no_zeros_plots)
+    generate_and_save_plots(out_dir_qc, "L_vs_K_for_strains_2SD_outside_mean_K", l_outside_2sd_k_plots)
+    generate_and_save_plots(out_dir_qc, "delta_background_vs_K_for_strains_2sd_outside_mean_K", delta_bg_outside_2sd_k_plots)
+
+    # Clean up
+    rm(df, df_above_tolerance, df_no_zeros)
+
+    # TODO: Originally this filtered L NA's
+    # Let's try to avoid for now since stats have already been calculated
+
+    # Process background strains
+    bg_strains <- c("YDL227C")
+    lapply(bg_strains, function(strain) {
+      
+      message("Processing background strain: ", strain)
+      
+      # Handle missing data by setting zero values to NA
+      # and then removing any rows with NA in L col
+      df_bg <- df_na %>%
+        filter(OrfRep == strain) %>%
+        mutate(
+          L = if_else(L == 0, NA, L),
+          K = if_else(K == 0, NA, K),
+          r = if_else(r == 0, NA, r),
+          AUC = if_else(AUC == 0, NA, AUC)
+        ) %>%
+        filter(!is.na(L))
+      
+      # Recalculate summary statistics for the background strain
+      message("Calculating summary statistics for background strain")
+      ss <- calculate_summary_stats(df_bg, variables, group_vars = c("OrfRep", "conc_num", "conc_num_factor"))
+      summary_stats_bg <- ss$summary_stats
+      df_bg_stats <- ss$df_with_stats
+      write.csv(summary_stats_bg,
+        file = file.path(out_dir, paste0("SummaryStats_BackgroundStrains_", strain, ".csv")),
+        row.names = FALSE)
+      
+      # Filter reference and deletion strains
+      # Formerly X2_RF (reference strains)
+      df_reference <- df_na_stats %>%
+        filter(OrfRep == strain) %>%
+        mutate(SM = 0)
+      
+      # Formerly X2 (deletion strains)
+      df_deletion <- df_na_stats %>%
+        filter(OrfRep != strain) %>%
+        mutate(SM = 0)
+
+      # Set the missing values to the highest theoretical value at each drug conc for L
+      # Leave other values as 0 for the max/min
+      reference_strain <- df_reference %>%
+        group_by(conc_num) %>%
+        mutate(
+          max_l_theoretical = max(max_L, na.rm = TRUE),
+          L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
+          SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM),
+          L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
+        ungroup()
+
+      # Ditto for deletion strains
+      deletion_strains <- df_deletion %>%
+        group_by(conc_num) %>%
+        mutate(
+          max_l_theoretical = max(max_L, na.rm = TRUE),
+          L = ifelse(L == 0 & !is.na(L) & conc_num > 0, max_l_theoretical, L),
+          SM = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, 1, SM),
+          L = ifelse(L >= max_l_theoretical & !is.na(L) & conc_num > 0, max_l_theoretical, L)) %>%
+        ungroup()
+
+      # Calculate interactions
+      variables <- c("L", "K", "r", "AUC")
+      message("Calculating interaction scores")
+      print("Reference strain:")
+      print(head(reference_strain))
+      reference_results <- calculate_interaction_scores(reference_strain, max_conc, variables)
+      print("Deletion strains:")
+      print(head(deletion_strains))
+      deletion_results <- calculate_interaction_scores(deletion_strains, max_conc, variables)
+      
+      zscores_calculations_reference <- reference_results$zscores_calculations
+      zscores_interactions_reference <- reference_results$zscores_interactions
+      zscores_calculations <- deletion_results$zscores_calculations
+      zscores_interactions <- deletion_results$zscores_interactions
+      
+      # Writing Z-Scores to file
+      write.csv(zscores_calculations_reference, file = file.path(out_dir, "RF_ZScores_Calculations.csv"), row.names = FALSE)
+      write.csv(zscores_calculations, file = file.path(out_dir, "ZScores_Calculations.csv"), row.names = FALSE)
+      write.csv(zscores_interactions_reference, file = file.path(out_dir, "RF_ZScores_Interaction.csv"), row.names = FALSE)
+      write.csv(zscores_interactions, file = file.path(out_dir, "ZScores_Interaction.csv"), row.names = FALSE)
+
+      # Create interaction plots
+      reference_plot_configs <- generate_interaction_plot_configs(df_reference, variables)
+      deletion_plot_configs <- generate_interaction_plot_configs(df_deletion, variables)
+      generate_and_save_plots(out_dir, "RF_interactionPlots", reference_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
+      generate_and_save_plots(out_dir, "InteractionPlots", deletion_plot_configs, grid_layout = list(ncol = 4, nrow = 3))
+
+      # Define conditions for enhancers and suppressors
+      # TODO Add to study config file?
+      threshold <- 2
+      enhancer_condition_L <- zscores_interactions$Avg_Zscore_L >= threshold
+      suppressor_condition_L <- zscores_interactions$Avg_Zscore_L <= -threshold
+      enhancer_condition_K <- zscores_interactions$Avg_Zscore_K >= threshold
+      suppressor_condition_K <- zscores_interactions$Avg_Zscore_K <= -threshold
+      
+      # Subset data
+      enhancers_L <- zscores_interactions[enhancer_condition_L, ]
+      suppressors_L <- zscores_interactions[suppressor_condition_L, ]
+      enhancers_K <- zscores_interactions[enhancer_condition_K, ]
+      suppressors_K <- zscores_interactions[suppressor_condition_K, ]
+      
+      # Save enhancers and suppressors
+      message("Writing enhancer/suppressor csv files")
+      write.csv(enhancers_L, file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_L.csv"), row.names = FALSE)
+      write.csv(suppressors_L, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_L.csv"), row.names = FALSE)
+      write.csv(enhancers_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_K.csv"), row.names = FALSE)
+      write.csv(suppressors_K, file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K.csv"), row.names = FALSE)
+      
+      # Combine conditions for enhancers and suppressors
+      enhancers_and_suppressors_L <- zscores_interactions[enhancer_condition_L | suppressor_condition_L, ]
+      enhancers_and_suppressors_K <- zscores_interactions[enhancer_condition_K | suppressor_condition_K, ]
+      
+      # Save combined enhancers and suppressors
+      write.csv(enhancers_and_suppressors_L,
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_and_Suppressors_L.csv"), row.names = FALSE)
+      write.csv(enhancers_and_suppressors_K,
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_and_Suppressors_K.csv"), row.names = FALSE)
+      
+      # Handle linear model based enhancers and suppressors
+      lm_threshold <- 2
+      enhancers_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L >= lm_threshold, ]
+      suppressors_lm_L <- zscores_interactions[zscores_interactions$Z_lm_L <= -lm_threshold, ]
+      enhancers_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K >= lm_threshold, ]
+      suppressors_lm_K <- zscores_interactions[zscores_interactions$Z_lm_K <= -lm_threshold, ]
+      
+      # Save linear model based enhancers and suppressors
+      message("Writing linear model enhancer/suppressor csv files")
+      write.csv(enhancers_lm_L,
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_L_lm.csv"), row.names = FALSE)
+      write.csv(suppressors_lm_L,
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_L_lm.csv"), row.names = FALSE)
+      write.csv(enhancers_lm_K,
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Enhancers_K_lm.csv"), row.names = FALSE)
+      write.csv(suppressors_lm_K,
+        file = file.path(out_dir, "ZScores_Interaction_Deletion_Suppressors_K_lm.csv"), row.names = FALSE)
+
+      zscores_interactions_adjusted <- adjust_missing_and_rank(zscores_interactions)
+      
+      # Generate all rank plot configurations for L and K
+      rank_plot_configs <- c(
+        generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_L", "Avg_Zscore_L", "L"),
+        generate_rank_plot_configs(zscores_interactions_adjusted, "Rank_K", "Avg_Zscore_K", "K")
+      )
+
+      # Generate and save rank plots
+      generate_and_save_plots(output_dir = out_dir, file_name = "RankPlots",
+        plot_configs = rank_plot_config, grid_layout = list(ncol = 3, nrow = 2))
+
+      # # Correlation plots
+      # lm_list <- list(
+      #   lm(Z_lm_K ~ Z_lm_L, data = zscores_interactions_filtered),
+      #   lm(Z_lm_r ~ Z_lm_L, data = zscores_interactions_filtered),
+      #   lm(Z_lm_AUC ~ Z_lm_L, data = zscores_interactions_filtered),
+      #   lm(Z_lm_r ~ Z_lm_K, data = zscores_interactions_filtered),
+      #   lm(Z_lm_AUC ~ Z_lm_K, data = zscores_interactions_filtered),
+      #   lm(Z_lm_AUC ~ Z_lm_r, data = zscores_interactions_filtered)
+      # )
+
+      lm_summaries <- lapply(lm_list, summary)
+      correlation_plot_configs <- generate_correlation_plot_configs(zscores_interactions_filtered, lm_list, lm_summaries)
+      generate_and_save_plots(zscores_interactions_filtered, output_dir, correlation_plot_configs)
+    })
+  })
+}
+main()
--- a/qhtcp-workflow/apps/r/calculate_pairwise_lk.R
+++ b/qhtcp-workflow/apps/r/calculate_pairwise_lk.R
@@ -0,0 +1,209 @@
+suppressMessages({
+  library("ggplot2")
+  library("plotly")
+  library("htmlwidgets")
+  library("extrafont")
+  library("grid")
+  library("ggthemes")
+})
+
+# Constants for configuration
+PLOT_WIDTH <- 12
+PLOT_HEIGHT <- 8
+BASE_SIZE <- 14
+
+options(warn = 2, max.print = 100)
+
+# Parse arguments
+parse_arguments <- function() {
+  if (interactive()) {
+    args <- c(
+      "Exp1",
+      "/path/to/exp1_file.csv",
+      "Exp2",
+      "/path/to/exp2_file.csv",
+      "/path/to/output_dir"
+    )
+  } else {
+    args <- commandArgs(trailingOnly = TRUE)
+  }
+  list(
+    exp1_name = args[1],
+    exp1_file = normalizePath(args[2], mustWork = TRUE),
+    exp2_name = args[3],
+    exp2_file = normalizePath(args[4], mustWork = TRUE),
+    output_dir = normalizePath(args[5], mustWork = FALSE)
+  )
+}
+
+args <- parse_arguments()
+
+# Create output directories if they don't exist
+pairDirL <- file.path(args$output_dir, paste0("PairwiseCompareL_", args$exp1_name, "-", args$exp2_name))
+pairDirK <- file.path(args$output_dir, paste0("PairwiseCompareK_", args$exp1_name, "-", args$exp2_name))
+dir.create(pairDirL, showWarnings = FALSE, recursive = TRUE)
+dir.create(pairDirK, showWarnings = FALSE, recursive = TRUE)
+
+# Define themes and scales
+theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
+  theme_foundation(base_size = base_size, base_family = base_family) +
+    theme(
+      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
+      panel.background = element_rect(colour = NA),
+      plot.background = element_rect(colour = NA),
+      panel.border = element_rect(colour = NA),
+      axis.title = element_text(face = "bold", size = rel(1)),
+      axis.line = element_line(colour = "black"),
+      panel.grid.major = element_line(colour = "#f0f0f0"),
+      panel.grid.minor = element_blank(),
+      legend.position = "bottom",
+      legend.direction = "horizontal",
+      legend.key.size = unit(0.2, "cm"),
+      plot.margin = unit(c(10, 5, 5, 5), "mm"),
+      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
+      strip.text = element_text(face = "bold")
+    )
+}
+
+theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
+  theme_publication(base_size, base_family) +
+    theme(
+      legend.position = "right",
+      legend.direction = "vertical",
+      legend.key.size = unit(0.5, "cm")
+    )
+}
+
+scale_fill_publication <- function(...) {
+  discrete_scale("fill", "Publication", manual_pal(values = c(
+    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
+  )), ...)
+}
+
+scale_colour_publication <- function(...) {
+  discrete_scale("colour", "Publication", manual_pal(values = c(
+    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
+    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
+  )), ...)
+}
+
+# Read and merge data
+load_and_merge_data <- function(file1, file2) {
+  df1 <- read.csv(file = file1, stringsAsFactors = FALSE, header = TRUE)
+  df2 <- read.csv(file = file2, stringsAsFactors = FALSE, header = TRUE)
+  merge(df1, df2, by = "Term_Avg", all = TRUE, suffixes = c("_df1", "_df2"))
+}
+
+# Generate a plot and save to PDF and HTML
+generate_plot <- function(data, x_var, y_var, color_var, title, file_path, theme_function) {
+  gg <- ggplot(data = data, aes_string(
+    x = x_var,
+    y = y_var,
+    color = color_var
+  )) +
+    xlab(paste("GO Term Avg lm Z for", args$exp1_name)) +
+    geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
+    geom_point(shape = 3) +
+    ylab(paste("GO Term Avg lm Z for", args$exp2_name)) +
+    ggtitle(title) +
+    theme_function()
+
+  pdf(file.path(file_path, ".pdf"), width = PLOT_WIDTH, height = PLOT_HEIGHT)
+  print(gg)
+  dev.off()
+
+  pgg <- ggplotly(gg)
+  htmlwidgets::saveWidget(pgg, file.path(file_path, ".html"))
+}
+
+# Identify and annotate specific interactions
+annotate_interactions <- function(df, exp1_name, exp2_name, suffix) {
+  df$Overlap_Avg <- NA
+  interactions <- list(
+    "df1_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] < 2),
+    "df1_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] > -2),
+    "df2_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] < 2),
+    "df2_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] > -2),
+    "Overlap_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2),
+    "Overlap_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2),
+    "df2_Aggravators_df1_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2),
+    "df2_Alleviators_df1_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2)
+  )
+  annotation <- list(
+    df1_Specific_Aggravators = paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"),
+    df1_Specific_Alleviators = paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"),
+    df2_Specific_Aggravators = paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"),
+    df2_Specific_Alleviators = paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"),
+    Overlap_Aggravators = "Overlapping_Deletion_Enhancers",
+    Overlap_Alleviators = "Overlapping_Deletion_Suppressors",
+    df2_Aggravators_df1_Alleviators = paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"),
+    df2_Alleviators_df1_Aggravators = paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")
+  )
+  for (key in names(interactions)) {
+    try(df$Overlap_Avg[interactions[[key]]] <- annotation[[key]])
+  }
+  df
+}
+
+# Rank and filter data
+rank_and_filter_data <- function(df, suffix) {
+  z1 <- df
+  z1[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df1")]] - z1[[paste0("Z_lm_", suffix, "_SD_df1")]]
+  z1[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df2")]] - z1[[paste0("Z_lm_", suffix, "_SD_df2")]]
+  
+  z2 <- df
+  z2[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df1")]] + z2[[paste0("Z_lm_", suffix, "_SD_df1")]]
+  z2[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df2")]] + z2[[paste0("Z_lm_", suffix, "_SD_df2")]]
+  
+  df_above_threshold <- df[!is.na(df$Overlap_Avg), ]
+  df_above_threshold$df1_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df1")]], ties.method = "random")
+  df_above_threshold$df2_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df2")]], ties.method = "random")
+  
+  list(z1 = z1, z2 = z2, df_above_threshold = df_above_threshold)
+}
+
+# Main execution function for a pairwise comparison
+run_pairwise_comparison <- function(suffix, dir) {
+  df <- load_and_merge_data(args$exp1_file, args$exp2_file)
+  
+  # Generate initial ontology-based plot
+  generate_plot(df,
+    paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Ontology_Avg_df1",
+    paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
+    file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOntology")),
+    theme_publication_legend_right)
+
+  # Annotate interactions and generate overlap-based plot
+  df <- annotate_interactions(df, args$exp1_name, args$exp2_name, suffix)
+  ranks <- rank_and_filter_data(df, suffix)
+  
+  generate_plot(df,
+    paste0("Z_lm_", suffix, "_Avg_df1"),
+    paste0("Z_lm_", suffix, "_Avg_df2"),
+    "Overlap_Avg",
+    paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
+    file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap")),
+    theme_publication_legend_right)
+
+  generate_plot(ranks$df_above_threshold,
+    paste0("Z_lm_", suffix, "_Avg_df1"),
+    paste0("Z_lm_", suffix, "_Avg_df2"),
+    "Overlap_Avg",
+    paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name, "Above Threshold"),
+    file.path(dir,
+      paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap_AboveThreshold")),
+    theme_publication_legend_right)
+  
+  # Save CSV files
+  write.csv(df, file.path(dir,
+    paste0("All_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
+    row.names = FALSE)
+  write.csv(ranks$df_above_threshold,
+    file.path(dir, paste0("AboveThreshold_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
+    row.names = FALSE)
+}
+
+# Execute Pairwise L and Pairwise K comparisons
+run_pairwise_comparison("L", pairDirL)
+run_pairwise_comparison("K", pairDirK)
--- a/qhtcp-workflow/apps/r/createHeatMaps.R
+++ b/qhtcp-workflow/apps/r/createHeatMaps.R
@@ -0,0 +1,305 @@
+#!/usr/bin/env Rscript
+# This script will make heatmaps for the REMc analysis
+# need to give the input "finalTable.csv" file after running REMc generated by eclipse
+
+library(RColorBrewer)
+library(gplots)
+
+args <- commandArgs(TRUE)
+
+# Set output dir
+if (length(args) >= 1) {
+  input_finalTable <- file.path(args[1])
+} else {
+  input_finalTable <- "/REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
+}
+
+if (length(args) >= 2) {
+  outDir <- file.path(args[2])
+} else {
+  outDir <- "/REMcHeatmaps/REMcWithShift.csv" # for legacy workflow
+}
+
+hmapfile <- data.frame(read.csv(file = input_finalTable, header = TRUE, sep = ",", stringsAsFactors = FALSE))
+# set NAs to NA
+hmapfile[hmapfile == -100] <- NA
+hmapfile[hmapfile == 100] <- NA
+hmapfile[hmapfile == 0.001] <- NA
+hmapfile[hmapfile == -0.001] <- NA
+
+# select the number of rows based on the number of genes
+num_total_genes <- length(hmapfile[, 1])
+
+# Break out the cluster names so each part of the cluster origin can be accessed
+# line below removed because it adds to many genes to clusters when going past 1-0-10
+# since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
+# hmapfile$cluster.origin = gsub(" ","",x=hmapfile$cluster.origin)
+
+hmapfile$cluster.origin <- gsub(";", " ;", x = hmapfile$cluster.origin)
+hmapfile$cluster.origin <- strsplit(hmapfile$cluster.origin, ";")
+#use tail(x,n) for accessing the outward most cluster
+
+clust_rounds <- 0
+for (i in 1:num_total_genes) {
+  if (length(hmapfile$cluster.origin[[i]]) > clust_rounds) {
+    clust_rounds <- length(hmapfile$cluster.origin[[i]])
+  }
+}
+
+unique_clusts <- unique(hmapfile$cluster.origin[1:num_total_genes])
+unique_clusts <- unique_clusts[unique_clusts != " "]
+
+# Select only the unique cluster names
+unique_clusts <- sort(unique(unlist(unique_clusts, use.names = FALSE)), decreasing = FALSE)
+num_unique_clusts <- length(unique_clusts)
+
+# Base the color key on a statistical analysis of the L and K data
+# need to create "breaks" to set the color key, need to have 12 different breaks (for 11 colors)
+# scale() will calculate the mean and standard deviation of the entire vector,
+# then "scale" each element by those values by subtracting the mean and dividing by the sd.
+# hmapfile[,4:(length(hmapfile[1,]) - 2)] <- scale(hmapfile[,4:(length(hmapfile[1,]) - 2)])
+# change so that the L data is multiplied to be on the same scale as the K data
+
+KEY_MIN <- 0
+KEY_MAX <- 0
+K_MIN <- 0
+L_MAX <- 0
+KcolumnValues <- vector()
+LcolumnValues <- vector()
+
+for (i in 4:(length(hmapfile[1, ]) - 2)){
+  if (grepl("_Z_lm_K", colnames(hmapfile)[i], fixed = TRUE) == TRUE) {
+    KcolumnValues <- append(KcolumnValues, i)
+  }
+  if (grepl("_Z_lm_L", colnames(hmapfile)[i], fixed = TRUE) == TRUE) {
+    LcolumnValues <- append(LcolumnValues, i)
+  }
+}
+
+# L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[4]
+# K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.01,.5,.99,1),na.rm=TRUE)[2]
+# L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.975,1),na.rm=TRUE)[4]
+# K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.025,.5,.99,1),na.rm=TRUE)[2]
+
+# Z scores are
+L_MAX <- 12
+K_MIN <- -12
+
+# L_Multiplier <- as.numeric(abs(K_MIN/L_MAX))
+# hmapfile[,LcolumnValues] <- hmapfile[,LcolumnValues] * L_Multiplier
+
+# if(grepl("SHIFT",colnames(hmapfile)[4],fixed=TRUE) == TRUE){
+#  print("FOUND SHIFT VALUES")
+#  hmapfile[,(LcolumnValues - 1)] <- hmapfile[,(LcolumnValues-1)] * L_Multiplier
+# }
+
+# KEY_MAX <- as.numeric(L_MAX * L_Multiplier)
+# KEY_MIN <- as.numeric(K_MIN)
+
+KEY_MAX <- as.numeric(L_MAX)
+KEY_MIN <- as.numeric(K_MIN)
+
+print(KEY_MIN)
+print(L_MAX)
+# print(L_Multiplier)
+
+colormapbreaks <- c(KEY_MIN, KEY_MIN * (5 / 6), KEY_MIN * (4 / 6), KEY_MIN * (3 / 6), KEY_MIN * (2 / 6),
+  KEY_MIN * (1 / 6), KEY_MAX * (1 / 6), KEY_MAX * (2 / 6), KEY_MAX * (3 / 6), KEY_MAX * (4 / 6), KEY_MAX * (5 / 6), KEY_MAX)
+# print(colormapbreaks)
+
+# Probably should give a way to detect shift in case that is is not in the first row... (maybe just grepl for the whole column name?)
+# However since also using this to amend the first part.
+# Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
+# then could just use these values and create spaces only between interaction values
+# possibly could get rid of redundant shift values if we don't want to view these
+# could we pool all the shift data/average it?
+if (grepl("Shift", colnames(hmapfile)[4], fixed = TRUE) == TRUE) {
+  even_columns <- seq(from = 2, to = (length(hmapfile[1, ]) - 7), by = 2)
+  # ev_repeat = rep("white",length(even_columns))
+  # ev_repeat = rep("red",(length(hmapfile[1,]) - 5))
+  # middle_col <- (length(hmapfile[1,]) - 5)/2
+  # ev_repeat[(middle_col/2)] <- "black"
+  # print(ev_repeat)
+}
+
+if (grepl("Shift", colnames(hmapfile)[4], fixed = TRUE) == FALSE) {
+  even_columns <- seq(from = 2, to = (length(hmapfile[1, ]) - 7), by = 1)
+  print("NO SHIFT VALS FOUND")
+}
+
+# FOR THIS SCRIPT ONLY (rap tem hu script)
+# even_columns <- c(2,5,7,10,12,15,17)
+
+# m <- 0
+colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1, ]) - 2)])
+# print(colnames_edit)
+for (i in 1:length(colnames_edit)) {
+  if (grepl("Shift", colnames_edit[i], fixed = TRUE) == TRUE) {
+    colnames_edit[i] <- ""
+    colnames_edit[i + 1] <- gsub(pattern = "_Z_lm_", replacement = " ", x = colnames_edit[i + 1])
+    try(colnames_edit[i + 1] <- gsub(pattern = "_", replacement = " ", x = colnames_edit[i + 1]))
+    
+    # INT_store <- strsplit(colnames_edit[i+1], "Z_lm")
+    # print(length(unlist(INT_store)))
+    # if(length(unlist(INT_store)) == 4){
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],sep=" ")
+    # }
+    # if(length(unlist(INT_store)) == 3){
+    #
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],sep=" ")
+    # }
+    # if(length(unlist(INT_store)) == 5){
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],unlist(INT_store)[4],sep=" ")
+    # }
+    # if(length(unlist(INT_store)) == 6){
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[6],sep=" ")
+    # }
+  }
+}
+print(colnames_edit)
+# break()
+# colnames_edit[5] <- "TEM HLEG K"
+# colnames_edit[10] <- "TEM HL K"
+# colnames_edit[15] <- "TEM HLEG L"
+# colnames_edit[20] <- "TEM HL L"
+
+# Create the heatmaps
+for (i in 1:num_unique_clusts) {
+  cluster <- unique_clusts[i]
+  cluster_data <- subset(hmapfile, grepl(cluster, cluster.origin))
+  cluster_length <- length(cluster_data[, 1])
+  if (cluster_length != 1) {
+    X0 <- as.matrix(cluster_data[, 4:(length(hmapfile[1, ]) - 2)])
+    if (cluster_length >= 2001) {
+      mypath <- file.path(outDir, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 20, width = 15)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim=c(-132, 132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        # cellnote = round(X0,digits = 0), notecex = 0.1, key = TRUE,
+        keysize = 0.7, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        # ColSideColors=ev_repeat,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit
+      )
+      # abline(v=0.5467,col="black")
+      dev.off()
+    }
+    if (cluster_length >= 201 && cluster_length <= 2000) {
+      mypath <- file.path(outDir, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 15, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim=c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.1, key = TRUE,
+        keysize = 0.7, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit
+      )
+      # abline(v=0.5316,col="black")
+      dev.off()
+    }
+    if (cluster_length >= 150 && cluster_length <= 200) {
+      mypath <- file.path(outDir, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 12, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim=c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.2, key = TRUE,
+        keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit
+      )
+      dev.off()
+    }
+    if (cluster_length >= 101 && cluster_length <= 149) {
+      mypath <- file.path(outDir, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, mypath, height = 12, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.2, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        #zlim=c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.3, key = TRUE,
+        keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit
+      )
+      dev.off()
+    }
+    if (cluster_length >= 60 && cluster_length <= 100) {
+      mypath <- file.path(outDir, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 12, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.4, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim=c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.3, key = TRUE,
+        keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit
+      )
+      dev.off()
+    }
+    if (cluster_length <= 59 && cluster_length >= 30) {
+      mypath <- file.path(outDir, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 9, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.6, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim=c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.4, key = TRUE,
+        keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit
+      )
+      dev.off()
+    }
+    if (cluster_length <= 29) {
+      mypath <- file.path(outDir, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 7, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA,
+        distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.9, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim=c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.4, key = TRUE,
+        keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit
+      )
+      dev.off()
+    }
+  }
+  # print(paste("FINISHED", "CLUSTER",cluster,sep=" "))
+}
--- a/qhtcp-workflow/apps/r/createHeatMapsHomology.R
+++ b/qhtcp-workflow/apps/r/createHeatMapsHomology.R
@@ -0,0 +1,364 @@
+#!/usr/bin/env Rscript
+
+library("RColorBrewer")
+library("gplots")
+library("tidyverse")
+
+args <- commandArgs(TRUE)
+
+# Define the output path for the heatmaps - create this folder first - in linux terminal in the working folder use > mkdir filename_heatmaps
+output_path <- file.path(Args[1])
+
+# Need to give the input "finalTable.csv" file after running REMc generated by eclipse
+final_table <- file.path(args[2])
+
+# Give the damp_list.txt as the third argument - will color the gene names differently
+damps <- file.path(Args[3])
+damp_list <- read.delim(file = damps, header = FALSE, stringsAsFactors = FALSE)
+
+# Give the yeast human homology mapping as the fourth argument - will add the genes to the finalTable and use info for heatmaps
+map_file <- file.path(Args[4])
+mapping <- read.csv(file = map_file, stringsAsFactors = FALSE)
+
+# Read in finalTablewithShift
+hmapfile <- data.frame(read.csv(file = final_table, header = TRUE, sep = ",", stringsAsFactors = FALSE))
+
+# Map the finalTable to the human homolog file
+hmapfile_map <- hmapfile
+# Match using OrfRep after dropping the _1 _2 _3 _4
+# But need to also account for some older files have ORF as column name rather than OrfRep in finalTable file
+if (colnames(hmapfile_map)[2] == "OrfRep") {
+  try(hmapfile_map$ORFMatch <- hmapfile_map$OrfRep)
+}
+if (colnames(hmapfile_map)[2] == "ORF") {
+  try(hmapfile_map$ORFMatch <- hmapfile_map$ORF)
+}
+hmapfile_map$ORFMatch <- gsub("_1", "", x = hmapfile_map$ORFMatch)
+hmapfile_map$ORFMatch <- gsub("_2", "", x = hmapfile_map$ORFMatch)
+hmapfile_map$ORFMatch <- gsub("_3", "", x = hmapfile_map$ORFMatch)
+hmapfile_map$ORFMatch <- gsub("_4", "", x = hmapfile_map$ORFMatch)
+
+# Join the hmapfile using
+hmapfile_w_homolog <- full_join(hmapfile_map, mapping, by = c("ORFMatch" = "ensembl_gene_id"))
+
+# Remove matches that are not from the finalTable
+hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$likelihood) == FASLE, ]
+
+# Write csv with all info from mapping file
+write.csv(hmapfile_w_homolog, file.path(output_path, paste0(final_table, "_WithHomologAll.csv")), row.names = FALSE)
+
+# Remove the non matches and output another mapping file - this is also one used to make heatmaps
+hmapfile_w_homolog <- hmapfile_w_homolog[is.na(hmapfile_w_homolog$external_gene_name_Human) == FALSE, ]
+write.csv(hmapfile_w_homolog, file.path(output_path, paste0(final_table, "_WithHomologMatchesOnly.csv"), row.names = FALSE))
+
+# Add human gene name to the Gene column
+hmapfile_w_homolog$Gene <- paste(hmapfile_w_homolog$Gene, hmapfile_w_homolog$external_gene_name_Human, sep = "/")
+
+# Only keep the finalTable file columns and the homology info
+hmap_len <- dim(hmapfile)[2]
+
+hmapfile_w_homolog_remake <-
+  cbind(hmapfile_w_homolog[, 1:hmap_len], hsapiens_homolog_orthology_type = hmapfile_w_homolog$hsapiens_homolog_orthology_type)
+hmapfile <- hmapfile_w_homolog_remake
+
+# Set NAs to NA
+hmapfile[hmapfile == -100] <- NA
+hmapfile[hmapfile == 100] <- NA
+hmapfile[hmapfile == 0.001] <- NA
+hmapfile[hmapfile == -0.001] <- NA
+
+# Select the number of rows based on the number of genes
+num_total_genes <- length(hmapfile[, 1])
+
+# Break out the cluster names so each part of the cluster origin can be accessed
+# Line below removed because it adds to many genes to clusters when going past 1-0-10
+# since it cannot differentiate between 1-0-1 and 1-0-10 when using grepl.
+# hmapfile$cluster.origin = gsub(" ","",x = hmapfile$cluster.origin)
+
+hmapfile$cluster.origin <- gsub(";", " ;", x = hmapfile$cluster.origin)
+hmapfile$cluster.origin <- strsplit(hmapfile$cluster.origin, ";")
+# use tail(x,n) for accessing the outward most cluster
+
+clust_rounds <- 0
+for (i in 1:num_total_genes) {
+  if (length(hmapfile$cluster.origin[[i]]) > clust_rounds) {
+    clust_rounds <- length(hmapfile$cluster.origin[[i]])
+  }
+}
+
+unique_clusts <- unique(hmapfile$cluster.origin[1:num_total_genes])
+unique_clusts <- unique_clusts[unique_clusts != " "]
+
+#select only the unique cluster names
+unique_clusts <- sort(unique(unlist(unique_clusts, use.names = FALSE)), decreasing = FALSE)
+num_unique_clusts <- length(unique_clusts)
+
+# Base the color key on a statistical analysis of the L and K data
+# need to create "breaks" to set the color key, need to have 12 different breaks (for 11 colors)
+# scale() will calculate the mean and standard deviation of the entire vector
+# then "scale" each element by those values by subtracting the mean and dividing by the sd
+
+# hmapfile[,4:(length(hmapfile[1,]) - 2)] <- scale(hmapfile[,4:(length(hmapfile[1,]) - 2)])
+
+# Change so that the L data is multiplied to be on the same scale as the K data
+
+KEY_MIN <- 0
+KEY_MAX <- 0
+K_MIN <- 0
+L_MAX <- 0
+KcolumnValues <- vector()
+LcolumnValues <- vector()
+
+for (i in 4:(length(hmapfile[1, ]) - 3)){
+  if (grepl("_Z_lm_K", colnames(hmapfile)[i], fixed = TRUE) == TRUE) {
+    KcolumnValues <- append(KcolumnValues, i)
+  }
+  if (grepl("_Z_lm_L", colnames(hmapfile)[i], fixed = TRUE) == TRUE) {
+    LcolumnValues <- append(LcolumnValues, i)
+  }
+}
+
+# L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.99,1),na.rm = TRUE)[4]
+# K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.01,.5,.99,1),na.rm = TRUE)[2]
+# L_MAX <- quantile(hmapfile[,LcolumnValues],c(0,.01,.5,.975,1),na.rm = TRUE)[4]
+# K_MIN <- quantile(hmapfile[,KcolumnValues],c(0,.025,.5,.99,1),na.rm = TRUE)[2]
+
+# Z scores are
+L_MAX <- 12
+K_MIN <- -12
+
+# L_Multiplier <- as.numeric(abs(K_MIN/L_MAX))
+# hmapfile[,LcolumnValues] <- hmapfile[,LcolumnValues] * L_Multiplier
+
+# if(grepl("SHIFT",colnames(hmapfile)[4],fixed = TRUE) == TRUE){
+#  print("FOUND SHIFT VALUES")
+#  hmapfile[,(LcolumnValues - 1)] <- hmapfile[,(LcolumnValues-1)] * L_Multiplier
+# }
+
+#KEY_MAX <- as.numeric(L_MAX * L_Multiplier)
+#KEY_MIN <- as.numeric(K_MIN)
+KEY_MAX <- as.numeric(L_MAX)
+KEY_MIN <- as.numeric(K_MIN)
+
+print(KEY_MIN)
+print(L_MAX)
+#print(L_Multiplier)
+
+colormapbreaks <- c(KEY_MIN, KEY_MIN * (5 / 6), KEY_MIN * (4 / 6), KEY_MIN * (3 / 6),
+  KEY_MIN * (2 / 6), KEY_MIN * (1 / 6), KEY_MAX * (1 / 6), KEY_MAX * (2 / 6),
+  KEY_MAX * (3 / 6), KEY_MAX * (4 / 6), KEY_MAX * (5 / 6), KEY_MAX)
+# print(colormapbreaks)
+
+# Probably should give a way to detect shift in case that is is not in the first row... (maybe just grepl for the whole column name?)
+# However since also using this to amend the first part.
+# Could possibly identify all the ones that contain the word shift and then create an object containing just those numbers
+# then could just use these values and create spaces only between interaction values
+# possibly could get rid of redundant shift values if we don't want to view these
+# could we pool all the shift data/average it?
+if (grepl("Shift", colnames(hmapfile)[4], fixed = TRUE) == TRUE) {
+  even_columns <- seq(from = 2, to = (length(hmapfile[1, ]) - 7), by = 2)
+  # ev_repeat = rep("white",length(even_columns))
+  # ev_repeat = rep("red",(length(hmapfile[1,]) - 5))
+  # middle_col <- (length(hmapfile[1,]) - 5)/2
+  # ev_repeat[(middle_col/2)] <- "black"
+  # print(ev_repeat)
+}
+
+if (grepl("Shift", colnames(hmapfile)[4], fixed = TRUE) == FALSE) {
+  even_columns <- seq(from = 2, to = (length(hmapfile[1, ]) - 7), by = 1)
+  print("NO SHIFT VALS FOUND")
+}
+
+# for this script only (rap tem hu script)
+# even_columns <- c(2,5,7,10,12,15,17)
+
+# m <- 0
+colnames_edit <- as.character(colnames(hmapfile)[4:(length(hmapfile[1, ]) - 3)])
+
+colnames(damp_list)[1] <- "ORF"
+hmapfile$damps <- "YKO"
+colnames(hmapfile)[2] <- "ORF"
+try(hmapfile[hmapfile$ORF %in% damp_list$ORF, ]$damps <- "YKD")
+# X <- X[order(X$damps,decreasing = TRUE),]
+hmapfile$color2 <- NA
+try(hmapfile[hmapfile$damps == "YKO", ]$color2 <- "black")
+try(hmapfile[hmapfile$damps == "YKD", ]$color2 <- "red")
+
+hmapfile$color <- NA
+try(hmapfile[hmapfile$hsapiens_homolog_orthology_type == "ortholog_many2many", ]$color <- "#F8766D")
+try(hmapfile[hmapfile$hsapiens_homolog_orthology_type == "ortholog_one2many", ]$color <- "#00BA38")
+try(hmapfile[hmapfile$hsapiens_homolog_orthology_type == "ortholog_one2one", ]$color <- "#619CFF")
+
+# print(colnames_edit)
+for (i in 1:length(colnames_edit)) {
+  if (grepl("Shift", colnames_edit[i], fixed = TRUE) == TRUE) {
+    colnames_edit[i] <- ""
+    colnames_edit[i + 1] <- gsub(pattern = "_Z_lm_", replacement = " ", x = colnames_edit[i + 1])
+    try(colnames_edit[i + 1] <- gsub(pattern = "_", replacement = " ", x = colnames_edit[i + 1]))
+    
+    # INT_store <- strsplit(colnames_edit[i+1], "Z_lm")
+    # print(length(unlist(INT_store)))
+    # if(length(unlist(INT_store)) == 4){
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],sep = " ")
+    # }
+    # if(length(unlist(INT_store)) == 3){
+    #
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],sep = " ")
+    # }
+    # if(length(unlist(INT_store)) == 5){
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[3],unlist(INT_store)[4],sep = " ")
+    # }
+    # if(length(unlist(INT_store)) == 6){
+    #   colnames_edit[i+1] <- paste(unlist(INT_store)[1],unlist(INT_store)[2],unlist(INT_store)[6],sep = " ")
+    # }
+
+  }
+}
+
+print(colnames_edit)
+# break()
+# colnames_edit[5] <- "TEM HLEG K"
+# colnames_edit[10] <- "TEM HL K"
+# colnames_edit[15] <- "TEM HLEG L"
+# colnames_edit[20] <- "TEM HL L"
+
+# Create the heatmaps
+for (i in 1:num_unique_clusts) {
+  cluster <- unique_clusts[i]
+  cluster_data <- subset(hmapfile, grepl(cluster, cluster.origin))
+  cluster_length <- length(cluster_data[, 1])
+  if (cluster_length != 1) {
+    X0 <- as.matrix(cluster_data[, 4:(length(hmapfile[1, ]) - 6)])
+    if (cluster_length >= 2001) {
+      mypath <- file.path(output_path, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 20, width = 15)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim = c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        # cellnote = round(X0,digits = 0), notecex = 0.1, key = TRUE,
+        keysize = 0.7, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        # ColSideColors = ev_repeat,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
+      )
+      # abline(v = 0.5467,col = "black")
+      dev.off()
+    }
+    if (cluster_length >= 201 && cluster_length <= 2000) {
+      mypath <- file.path(output_path, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 15, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim = c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.1, key = TRUE,
+        keysize = 0.7, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
+      )
+      # abline(v = 0.5316,col = "black")
+      dev.off()
+    }
+    if (cluster_length >= 150 && cluster_length <= 200) {
+      mypath <- file.path(output_path, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 12, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.1, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim = c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.2, key = TRUE,
+        keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
+      )
+      dev.off()
+    }
+    if (cluster_length >= 101 && cluster_length <= 149) {
+      mypath <- file.path(output_path, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 12, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.2, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim = c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.3, key = TRUE,
+        keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
+      )
+      dev.off()
+    }
+    if (cluster_length >= 60 && cluster_length <= 100) {
+      mypath <- file.path(output_path, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 12, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.4, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim = c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.3, key = TRUE,
+        keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
+      )
+      dev.off()
+    }
+    if (cluster_length <= 59 && cluster_length >= 30) {
+      mypath <- file.path(output_path, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 9, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA, distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.6, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim = c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.4, key = TRUE,
+        keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
+      )
+      dev.off()
+    }
+    if (cluster_length <= 29) {
+      mypath <- file.path(output_path, paste0("cluster_", gsub(" ", "", cluster), ".pdf"))
+      pdf(file = mypath, height = 7, width = 12)
+      heatmap.2(
+        x = X0,
+        Rowv = TRUE, Colv = NA,
+        distfun = dist, hclustfun = hclust,
+        dendrogram = "row", cexCol = 0.8, cexRow = 0.9, scale = "none",
+        breaks = colormapbreaks, symbreaks = FALSE, colsep = even_columns, sepcolor = "white", offsetCol = 0.1,
+        # zlim = c(-132,132),
+        xlab = "Type of Media", ylab = "Gene Name",
+        cellnote = round(X0, digits = 0), notecex = 0.4, key = TRUE,
+        keysize = 1, trace = "none", density.info = c("none"), margins = c(10, 8),
+        na.color = "red", col = brewer.pal(11, "PuOr"),
+        main = cluster,
+        labRow = as.character(cluster_data$Gene), labCol = colnames_edit, colRow = cluster_data$color2, RowSideColors = cluster_data$color
+      )
+      dev.off()
+    }
+  }
+  # print(paste("FINISHED", "CLUSTER",cluster,sep = " "))
+}
--- a/qhtcp-workflow/apps/r/go_terms.tab
+++ b/qhtcp-workflow/apps/r/go_terms.tab
--- a/qhtcp-workflow/apps/r/gtaTemplate.R
+++ b/qhtcp-workflow/apps/r/gtaTemplate.R
@@ -0,0 +1,196 @@
+#!/usr/bin/env R
+# GTA (GoTermAveraging)
+# Your output  may not be reproducible as org.Sc.sgd.db is uploaded from Bioconductor R library and changes
+#
+# Updated 240724 Bryan C Roessler to improve file operations and portability
+# NOTE: The script now has 2 additional OPTIONAL arguments:
+#   1. Path to SGD terms file (go.terms.tab)
+#   2. Path to SGD features file (gene_association.sgd)
+
+library("stringr")
+library("org.Sc.sgd.db")
+library("plyr")
+
+# Parse arguments
+args <- commandArgs(TRUE)
+
+exp_name <- args[1]
+
+if (length(args) >= 2) {
+  zscores_file <- args[2]
+} else {
+  zscores_file <- "zscores/zscores_interaction.csv" # https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
+}
+
+if (length(args) >= 3) {
+  sgd_terms_file <- args[3]
+} else {
+  sgd_terms_file <- "go_terms.tab"
+}
+
+if (length(args) >= 4) {
+  sgd_features_file <- args[4]
+} else {
+  sgd_features_file <- "gene_association.sgd" # https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
+}
+
+if (length(args) >= 5) {
+  output_dir <- args[5]
+} else {
+  output_dir <- "../../out/gta" # https://downloads.yeastgenome.org/curation/chromosomal_feature/gene_association.sgd
+}
+
+# # Set SGDgeneList file path
+# if (length(args) > 4) {
+#   SGDgeneList <- args[4]
+# } else {
+#   SGDgeneList <- "../Code/SGD_features.tab"
+
+# Begin for loop for experiments in this study
+# ZScores_Interaction.csv
+for (m in 1:length(zscores_file)) {
+  
+  # zscores_file <- paste(Wstudy,"/",expName[m],'/ZScores/ZScores_Interaction.csv',sep="")  #ArgsScore[1]
+  X <- read.csv(file = zscores_file[m], stringsAsFactors = FALSE, header = TRUE)
+  
+  if (colnames(X)[1] == "OrfRep") {
+    colnames(X)[1] <- "ORF"
+  }
+  
+  #Terms is the GO term list
+  Terms <- read.delim(file = sgd_terms_file, header = FALSE, quote = "",
+    col.names = c("GO_ID", "GO_Term", "GO_Aspect", "GO_Term_Definition"))
+  #all ORFs associated with GO term
+  GO2ALLORFs <- as.list(org.Sc.sgdGO2ALLORFS)
+  #Gene_Association is the gene association to GO term file
+  Gene_Association <- read.delim(sgd_features_file, skip = 8, header = FALSE, quote = "",
+    col.names = c("Database", "Database_Object_ID", "Database_Object_Symbol", "NOT", "GO_ID",
+      "Database_Reference", "Evidence", "With_or_From", "Aspect", "Database_Object_Name",
+      "Database_Object_Synonym", "Database_Object_Type", "taxon", "Date", "Assigned_By", "OtherInfo", "Empty"
+    )
+  )
+  #Get the ORF names associated with each gene/GO term
+  Gene_Association$ORF <- str_split_fixed(as.character(Gene_Association$Database_Object_Synonym), "\\|", 2)[, 1]
+  #Get the numeric GO ID for matching
+  Gene_Association$GO_ID_Numeric <- as.integer(str_split_fixed(as.character(Gene_Association$GO_ID), "\\:", 2)[, 2])
+  #Get all unique GO terms
+  GO_Terms <- unique(Gene_Association$GO_ID)
+  #Create a character vector with just the ColNames of the input file to store the scores for each GO term
+  Col_Names_X <- colnames(X)
+  #Create a data_frame with header from input_file
+  GO_Term_Averages <- X[0, ]
+  #Fill table with NAs same length as number of GO terms
+  GO_Term_Averages[1:length(GO_Terms), ] <- NA
+  #Change the first and second col names to GO_ID and Term
+  colnames(GO_Term_Averages)[1] <- "GO_ID"
+  colnames(GO_Term_Averages)[2] <- "Term"
+  
+  # Create new columns
+  GO_Term_Averages$Ontology <- NA
+  GO_Term_Averages$NumGenes <- NA
+  GO_Term_Averages$AllPossibleGenes <- NA
+  GO_Term_Averages$Genes <- NA
+  GO_Term_Averages$ORFs <- NA
+  
+  # Create a data.frame for the standard deviation info
+  GO_Term_SD <- X[0, ]
+  GO_Term_SD[1:length(GO_Terms), ] <- NA
+  colnames(GO_Term_SD)[1] <- "GO_ID"
+  colnames(GO_Term_SD)[2] <- "Term"
+  
+  # Loop for each GO term to get an average L and K Z score
+  for (i in 1:length(GO_Terms)) {
+    # Get the GO_Term
+    ID <- GO_Terms[i]
+    # Get data.frame for all genes associated to the GO Term
+    ID_AllGenes <- Gene_Association[Gene_Association$GO_ID == ID, ]
+    # Get a vector of just the gene names
+    ID_AllGenes_vector <- as.vector(GO2ALLORFs[as.character(ID)][[1]])
+    if (length(unique(ID_AllGenes_vector)) > 4000) {
+      next()
+    }
+    # Get the GO term character description where numeric Terms ID matches GO_Term's ID
+    GO_Description_Term <- as.character(Terms[Terms$GO_ID %in% ID_AllGenes$GO_ID_Numeric, ]$GO_Term[1])
+    # Get the Z scores for all genes in the GO_ID
+    Zscores_For_ID <- X[X$ORF %in% ID_AllGenes_vector, ]
+    # Get the Gene names and ORFs for the term
+    GO_Term_Averages$Genes[i] <- paste(unique(Zscores_For_ID$Gene), collapse = " | ")
+    GO_Term_Averages$ORFs[i] <- paste(unique(Zscores_For_ID$ORF), collapse = " | ")
+    
+    # Dataframe to report the averages for a GO term
+    # Get the GO ID
+    GO_Term_Averages$GO_ID[i] <- as.character(ID)
+    # Get the term name
+    GO_Term_Averages$Term[i] <- GO_Description_Term
+    # Get total number of genes annotated to the Term that we have in our library
+    GO_Term_Averages$NumGenes[i] <- length(unique(Zscores_For_ID$ORF))
+    # Get total number of genes annotated to the Term in SGD
+    GO_Term_Averages$AllPossibleGenes[i] <- length(unique(ID_AllGenes_vector))
+    # Get the ontology of the term
+    GO_Term_Averages$Ontology[i] <- as.character(ID_AllGenes$Aspect[1])
+    
+    # Calculate the average score for every column
+    for (j in 3:length(X[1, ])) {
+      GO_Term_Averages[i, j] <- mean(Zscores_For_ID[, j], na.rm = TRUE)
+      # GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
+    }
+    
+    # Also calculate same values for the SD
+    GO_Term_SD$GO_ID[i] <- as.character(ID)
+    # Get the term name
+    GO_Term_SD$Term[i] <- GO_Description_Term
+    
+    # Calculate column scores for SD
+    for (j in 3:length(X[1, ])) {
+      GO_Term_SD[i, j] <- sd(Zscores_For_ID[, j], na.rm = TRUE)
+      # GO_Scores <- colMeans(Zscores_For_ID[,3:length(X[1,])])
+    }
+  }
+  # Add either _Avg or _SD depending on if the calculated score is an average or SD
+  colnames(GO_Term_Averages) <- paste(colnames(GO_Term_Averages), "Avg", sep = "_")
+  colnames(GO_Term_SD) <- paste(colnames(GO_Term_SD), "SD", sep = "_")
+  # Combine the averages with the SDs to make one big data.frame
+  X2 <- cbind(GO_Term_Averages, GO_Term_SD)
+  # Test[ , order(names(test))]
+  X2 <- X2[, order(names(X2))]
+  X2 <- X2[!is.na(X2$Z_lm_L_Avg), ]
+  # Create output file
+  write.csv(X2, file.path(output_dir, expName[m], "Average_GOTerms_All.csv"), row.names = FALSE)
+  # Remove NAs
+  X3 <- X2[!is.na(X2$Z_lm_L_Avg), ]
+  
+  # Identify redundant GO terms
+  for (i in 1:length(X3[, 1])) {
+    # Loop through each GO term - get term
+    GO_term_ID <- as.character(X3$GO_ID_Avg[i])
+    # Get term in the X3
+    X3_Temp <- X3[X3$GO_ID_Avg == GO_term_ID, ]
+    # Get anywhere that has the same number K_Avg value
+    X3_Temp2 <- X3[X3$Z_lm_K_Avg %in% X3_Temp, ]
+    if (length(X3_Temp2[, 1]) > 1) {
+      if (length(unique(X3_Temp2$Genes_Avg)) == 1) {
+        X3_Temp2 <- X3_Temp2[1, ]
+      }
+    }
+    
+    if (i == 1) {
+      Y <- X3_Temp2
+    }
+    if (i > 1) {
+      Y <- rbind(Y, X3_Temp2)
+    }
+  }
+
+  Y1 <- unique(Y)
+  write.csv(Y1, file.path(output_dir, exp_name, "Average_GOTerms_All_NonRedundantTerms.csv"), row.names = FALSE)
+  Y2 <- Y1[Y1$Z_lm_L_Avg >= 2 | Y1$Z_lm_L_Avg <= -2, ]
+  Y2 <- Y2[!is.na(Y2$Z_lm_L_Avg), ]
+  write.csv(Y2, file.path(output_dir, exp_name, "Average_GOTerms_NonRedundantTerms_Above2SD_L.csv"), row.names = FALSE)
+  Y3 <- Y2[Y2$NumGenes_Avg > 2, ]
+  write.csv(Y3, file.path(output_dir, exp_name, "Average_GOTerms_NonRedundantTerms_Above2SD_L_Above2Genes.csv"), row.names = FALSE)
+  Y4 <- Y1[Y1$Z_lm_K_Avg >= 2 | Y1$Z_lm_K_Avg <= -2, ]
+  Y4 <- Y4[!is.na(Y4$Z_lm_K_Avg), ]
+  write.csv(Y4, file.path(output_dir, exp_name, "Average_GOTerms_NonRedundantTerms_Above2SD_K.csv"), row.names = FALSE)
+  Y5 <- Y4[Y4$NumGenes_Avg > 2, ]
+  write.csv(Y5, file.path(output_dir, exp_name, "Average_GOTerms_NonRedundantTerms_Above2SD_K_Above2Genes.csv"), row.names = FALSE)
+}
--- a/qhtcp-workflow/apps/r/joinInteractExps.R
+++ b/qhtcp-workflow/apps/r/joinInteractExps.R
@@ -0,0 +1,375 @@
+library("dplyr")
+
+# Function to parse and set arguments
+parse_arguments <- function() {
+  if (interactive()) {
+    args <- c(
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
+      2,
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp2"
+    )
+  } else {
+    args <- commandArgs(trailingOnly = TRUE)
+  }
+  
+  list(
+    out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
+    sd = as.numeric(args[2]),
+    study_info = normalizePath(file.path(args[3]), mustWork = FALSE),
+    input_dirs = args[4:length(args)]
+  )
+}
+
+args <- parse_arguments()
+
+# Create an array for the zscores files
+get_zscores_files <- function(dirs) {
+  files <- sapply(dirs, function(exp) {
+    file_path <- file.path(exp, "zscores", "zscores_interaction.csv")
+    if (file.exists(file_path)) file_path else NULL
+  }, simplify = TRUE, USE.NAMES = FALSE)
+  
+  # Filter out NULL entries
+  files[!sapply(files, is.null)]
+}
+
+zscores_files <- get_zscores_files(args$input_dirs)
+sprintf("The SD value is: %d", args$sd)
+
+# Ensure there are enough files to compare
+if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
+
+# Function to join zscores files
+join_zscores_files <- function(files) {
+  joined_data <- read.csv(file = files[1], stringsAsFactors = FALSE)
+  for (file in files[-1]) {
+    temp_data <- read.csv(file = file, stringsAsFactors = FALSE)
+    joined_data <- join(joined_data, temp_data, by = "OrfRep")
+  }
+  joined_data
+}
+
+# Load and join zscores files
+joined_data <- join_zscores_files(zscores_files)
+
+# Order and select columns
+order_and_select_columns <- function(data) {
+  ordered_data <- data[, order(colnames(data))]
+  selected_headers <- select(ordered_data,
+    contains("OrfRep"), matches("Gene"),
+    contains("z_lm_k"), contains("z_shift_k"),
+    contains("z_lm_l"), contains("z_shift_l"))
+  selected_headers
+}
+
+selected_headers <- order_and_select_columns(joined_data)
+
+# Remove redundant columns like "Gene.1"
+clean_headers <- function(data, suffixes) {
+  suffixes_to_remove <- paste0("Gene.", seq_len(suffixes))
+  select(data, -all_of(suffixes_to_remove))
+}
+
+headSel <- clean_headers(selected_headers, length(zscores_files) - 1)
+headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1)
+
+# Fill NA values in Shift and Z_lm columns
+fill_na_in_columns <- function(data) {
+  for (header in colnames(data)) {
+    if (grepl("Shift", header)) {
+      data[[header]][is.na(data[[header]])] <- 0.001
+    } else if (grepl("Z_lm_", header)) {
+      data[[header]][is.na(data[[header]])] <- 0.0001
+    }
+  }
+  data
+}
+
+headSel <- fill_na_in_columns(headSel)
+
+# Filter based on standard deviation
+filter_by_sd <- function(data, sd) {
+  if (sd == 0) return(data)
+  
+  z_lm_cols <- select(data, contains("z_lm_"))
+  filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
+  data[filter_vector, ]
+}
+
+REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), args$sd)
+shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), args$sd)
+
+# Reorder columns to interleave Z_lm and Shift data
+reorder_columns <- function(data1, data2) {
+  combined_data <- data1
+  for (i in 3:ncol(data1)) {
+    combined_data <- cbind(combined_data, data2[i], data1[i])
+  }
+  combined_data
+}
+
+combI <- reorder_columns(headSel2, shiftOnly)
+
+# Write output files
+write.csv(REMcRdy, file.path(args$out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
+write.csv(shiftOnly, file.path(args$out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
+
+# Relabel headers using experiment names from StudyInfo.csv
+relabel_headers <- function(headers, labels) {
+  new_labels <- headers
+  for (i in seq_along(headers)) {
+    suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
+    if (suffix %in% 1:3) {
+      exp_name <- labels[as.numeric(suffix), 2]
+      new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
+    }
+  }
+  new_labels
+}
+
+LabelStd <- read.csv(file = args$study_info, stringsAsFactors = FALSE)
+colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), LabelStd)
+colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), LabelStd)
+
+# Save relabeled files
+write.csv(REMcRdy, file.path(args$out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
+write.csv(shiftOnly, file.path(args$out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
+
+# Save updated parameters
+LabelStd[, 4] <- args$sd
+write.csv(LabelStd, file.path(args$out_dir, "parameters.csv"), row.names = FALSE)
+write.csv(LabelStd, file = args$study_info, row.names = FALSE)
+
+
+
+
+
+
+
+# library("plyr")
+# library("sos")
+# library("dplyr")
+
+# # Function to parse and set arguments
+# parse_arguments <- function() {
+#   if (interactive()) {
+#     args <- c(
+#       "./",                          # Default out_dir
+#       "2",                           # Default SD value
+#       "../Code/StudyInfo.csv"        # Default study_info path
+#     )
+#   } else {
+#     args <- commandArgs(trailingOnly = TRUE)
+#   }
+  
+#   list(
+#     out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
+#     sd = as.numeric(args[2]),
+#     study_info = normalizePath(file.path(args[3]), mustWork = FALSE),
+#     input_dirs = args[4:length(args)]
+#   )
+# }
+
+# args <- parse_arguments()
+
+# # Create an array for the zscores files
+# zscores_files <- sapply(args$input_dirs, function(study) {
+#   file_path <- file.path(study, "zscores", "zscores_interaction.csv")
+#     if (file.exists(file_path)) file_path else NULL
+#     }, simplify = TRUE, USE.NAMES = FALSE)
+
+# # Filter out NULL entries
+# zscores_files <- zscores_files[!sapply(zscores_files, is.null)]
+
+# sprintf("The SD value is: %d", args$sd)
+# # print(args$input_dirs)
+
+# # TODO this is better handled in a loop in case you want to compare more experiments?
+# # The input is already  designed for this
+# # Read in the files for your experiment and
+# # Join the two files at a time as a function of how many inputFile
+# # list the larger file first ? in this example X2 has the larger number of genes
+# # If X1 has a larger number of genes, switch the order of X1 and X2
+# if (length(zscores_files) < 2) {
+#   print("Note enough Exps to compare, skipping join")
+#   stop("Exiting script")
+# }
+
+# if (length(zscores_files) >= 2) {
+#   X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
+#   X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
+#   X <- join(X1, X2, by = "OrfRep")
+#   OBH <- X[, order(colnames(X))]  # OrderByHeader
+#   headers <- select(OBH, contains("OrfRep"), matches("Gene"),
+#     contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
+#   headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
+#   headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
+#   headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column  # frame for interleaving Z_lm with Shift colums
+# }
+
+# if (length(zscores_files) >= 3) {
+#   X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE)
+#   X <- join(X, X3, by = "OrfRep")
+#   headSel <- select(headers, -"Gene.1", -"Gene.2")
+#   headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
+#   headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
+# }
+
+# if (length(zscores_files) >= 4) {
+#   X4 <- read.csv(file = zscores_files[4], stringsAsFactors = FALSE)
+#   X <- join(X, X4, by = "OrfRep")
+#   headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
+#   headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
+#   headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
+# }
+
+# # print(headers)
+# # headSel$contains("Z_Shift") %>% replace_na(0.001)
+# headers <- colnames(headSel)
+# # print(headers)
+# i <- 0
+# for (i in 1:length(headers)) {
+#   if (grepl("Shift", headers[i])) {
+#     headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.001
+#   }
+#   if (grepl("Z_lm_", headers[i])) {
+#     headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.0001
+#   }
+# }
+
+# # 2SD option code to exclude Z_lm values less than 2 standard Deviations
+# REMcRdy <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("z_lm_"))
+# shiftOnly <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("z_shift"))
+
+# # Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
+# Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
+
+# # Using Text search grepl to relabel headers
+# REMcRdyHdr <- colnames(REMcRdy)
+# REMcRdyLabels <- "asdf"
+# shftHdr <- colnames(shiftOnly)
+# shiftLabels <- "asdf"
+# shiftLabels[1:2] <- shftHdr[1:2]
+# REMcRdyLabels[1:2] <- REMcRdyHdr[1:2]
+  
+# for (i in 3:(length(shftHdr))) {
+#   if (i == 3) {
+#     shiftLabels[3] <- paste0(Labels[1, 2], ".", shftHdr[3])
+#     REMcRdyLabels[3] <- paste0(Labels[1, 2], ".", REMcRdyHdr[3])
+#   }
+#   if (i == 5) {
+#     shiftLabels[5] <- paste0(Labels[1, 2], ".", shftHdr[5])
+#     REMcRdyLabels[5] <- paste0(Labels[1, 2], ".", REMcRdyHdr[5])
+#   }
+#   if (i == 7) {
+#     shiftLabels[7] <- paste0(Labels[1, 2], ".", shftHdr[7])
+#     REMcRdyLabels[7] <- paste0(Labels[1, 2], ".", REMcRdyHdr[7])
+#   }
+#   if (grepl(".1", shftHdr[i], fixed = true)) {
+#     shiftLabels[i] <- paste0(Labels[2, 2], ".", shftHdr[i])
+#     REMcRdyLabels[i] <- paste0(Labels[2, 2], ".", REMcRdyHdr[i])
+#   }
+#   if (grepl(".2", shftHdr[i], fixed = true)) {
+#     shiftLabels[i] < -paste0(Labels[3, 2], ".", shftHdr[i])
+#     REMcRdyLabels[i] <- paste0(Labels[3, 2], ".", REMcRdyHdr[i])
+#   }
+#   if (grepl(".3", shftHdr[i], fixed = true)) {
+#     shiftLabels[i] <- paste0(Labels[4, 2], ".", shftHdr[i])
+#     REMcRdyLabels[i] <- paste0(Labels[4, 2], ".", REMcRdyHdr[i])
+#   }
+# }
+
+# for (i in 3:(length(REMcRdyLabels))) {
+#   j <- as.integer(i)
+#   REMcRdyLabels[j] <- gsub("[.]", "_", REMcRdyLabels[j])
+#   shiftLabels[j] <- gsub("[.]", "_", shiftLabels[j])
+# }
+
+# colnames(shiftOnly) <- shiftLabels
+# colnames(REMcRdy) <- REMcRdyLabels
+
+# combI <- headSel2 # starting Template orf, Genename columns
+
+# # headersRemc<-colnames(REMcRdy)
+# # Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
+# for (i in 3:length(colnames(REMcRdy))) {
+#   combI <- cbind.data.frame(combI, shiftOnly[i])
+#   combI <- cbind.data.frame(combI, REMcRdy[i])
+# }
+
+# Vec1 <- NA
+# Vec2 <- NA
+# Vec3 <- NA
+# Vec4 <- NA
+# Vec5 <- NA
+# Vec6 <- NA
+# Vec7 <- NA
+# Vec8 <- NA
+
+# if (length(REMcRdy) == 6) {
+#   Vec1 <- abs(REMcRdy[, 3]) >= sd
+#   Vec2 <- abs(REMcRdy[, 4]) >= sd
+#   Vec3 <- abs(REMcRdy[, 5]) >= sd
+#   Vec4 <- abs(REMcRdy[, 6]) >= sd
+#   bolVec <- Vec1 | Vec2 | Vec3 | Vec4
+#   REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
+#   REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
+#   shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
+#   shiftOnlyGT2[, 3:6] <- shiftOnly[bolVec, 3:6]
+# }
+
+# if (length(REMcRdy) == 8) {
+#   Vec1 <- abs(REMcRdy[, 3]) >= sd
+#   Vec2 <- abs(REMcRdy[, 4]) >= sd
+#   Vec3 <- abs(REMcRdy[, 5]) >= sd
+#   Vec4 <- abs(REMcRdy[, 6]) >= sd
+#   Vec5 <- abs(REMcRdy[, 7]) >= sd
+#   Vec6 <- abs(REMcRdy[, 8]) >= sd
+#   bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
+#   REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
+#   REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
+#   shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
+#   shiftOnlyGT2[, 3:8] <- shiftOnly[bolVec, 3:8]
+# }
+
+# if (length(REMcRdy) == 10) {
+#   Vec1 <- abs(REMcRdy[, 3]) >= sd
+#   Vec2 <- abs(REMcRdy[, 4]) >= sd
+#   Vec3 <- abs(REMcRdy[, 5]) >= sd
+#   Vec4 <- abs(REMcRdy[, 6]) >= sd
+#   Vec5 <- abs(REMcRdy[, 7]) >= sd
+#   Vec6 <- abs(REMcRdy[, 8]) >= sd
+#   Vec7 <- abs(REMcRdy[, 9]) >= sd
+#   Vec8 <- abs(REMcRdy[, 10]) >= sd
+#   bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
+#   REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
+#   REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
+#   shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
+#   shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
+# }
+
+# if (sd != 0) {
+#   REMcRdy <- REMcRdyGT2  # [,2:length(REMcRdyGT2)]
+#   shiftOnly <- shiftOnlyGT2  # [,2:length(shiftOnlyGT2)]
+# }
+
+# if (sd == 0) {
+#   REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
+#   shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
+# }
+
+# # R places hidden "" around the header names. The following
+# # is intended to remove those quote so that the "" do not blow up the Java REMc.
+# # Use ,quote=F in the write.csv statement to fix R output file.
+# # write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
+# write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
+# write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
+# # LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
+
+# LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
+# # print(sd)
+# LabelStd[, 4] <- as.numeric(sd)
+# write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
+# write.csv(LabelStd, file = study_info, row.names = FALSE)
--- a/qhtcp-workflow/apps/r/join_interaction_zscores.R
+++ b/qhtcp-workflow/apps/r/join_interaction_zscores.R
@@ -0,0 +1,113 @@
+suppressMessages({
+  library("dplyr")
+  library("data.table")
+  library("readr")
+  library("stringr")
+})
+
+# Function to parse arguments
+parse_arguments <- function() {
+  if (interactive()) {
+    args <- c(
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
+      3, # sd value
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
+      "Experiment 1: Doxo versus HLD",
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
+      "Experiment 2: HLD versus Doxo"
+    )
+  } else {
+    args <- commandArgs(trailingOnly = TRUE)
+  }
+
+  out_dir <- normalizePath(args[1], mustWork = FALSE)
+  sd <- as.numeric(args[2])
+  paths <- normalizePath(args[seq(3, length(args), by = 2)], mustWork = FALSE)
+  names <- args[seq(4, length(args), by = 2)]
+  experiments <- setNames(paths, names)
+
+  list(
+    out_dir = out_dir,
+    sd = sd,
+    experiments = experiments
+  )
+}
+
+args <- parse_arguments()
+
+# Ensure main output directory exists
+dir.create(args$out_dir, showWarnings = FALSE, recursive = TRUE)
+
+# Function to read and combine z-score interaction files
+combine_zscores <- function(experiments, out_dir) {
+  combined_data <- lapply(names(experiments), function(exp_name) {
+    exp_dir <- experiments[[exp_name]]
+    zscore_file <- file.path(exp_dir, "zscores", "zscores_interaction.csv")
+
+    if (!file.exists(zscore_file)) {
+      stop("Z-score file does not exist for ", exp_name, " at ", zscore_file)
+    }
+
+    message("Reading z-score file for ", exp_name, " from ", zscore_file)
+    data <- fread(zscore_file)
+    data$Experiment <- exp_name
+
+    return(data)
+  }) %>%
+    bind_rows()
+
+  combined_output_file <- file.path(out_dir, "combined_zscores.csv")
+  fwrite(combined_data, combined_output_file, row.names = FALSE)
+
+  message("Combined z-score file saved to: ", combined_output_file)
+}
+
+# Function to read and combine summary statistics files
+combine_summary_stats <- function(experiments, out_dir) {
+  combined_stats <- lapply(names(experiments), function(exp_name) {
+    exp_dir <- experiments[[exp_name]]
+    summary_file <- file.path(exp_dir, "zscores", "summary_stats_all_strains.csv")
+
+    if (!file.exists(summary_file)) {
+      stop("Summary stats file does not exist for ", exp_name, " at ", summary_file)
+    }
+
+    message("Reading summary stats file for ", exp_name, " from ", summary_file)
+    data <- fread(summary_file)
+    data$Experiment <- exp_name
+
+    return(data)
+  }) %>%
+    bind_rows()
+
+  combined_output_file <- file.path(out_dir, "combined_summary_stats.csv")
+  fwrite(combined_stats, combined_output_file, row.names = FALSE)
+
+  message("Combined summary stats file saved to: ", combined_output_file)
+}
+
+# Function to generate final summary report
+generate_final_report <- function(out_dir) {
+  combined_zscores <- file.path(out_dir, "combined_zscores.csv")
+  combined_stats <- file.path(out_dir, "combined_summary_stats.csv")
+
+  if (!file.exists(combined_zscores) || !file.exists(combined_stats)) {
+    stop("Combined z-scores or summary stats files do not exist.")
+  }
+
+  zscores_data <- fread(combined_zscores)
+  stats_data <- fread(combined_stats)
+
+  message("Merging z-score and summary stats data...")
+  final_report <- merge(zscores_data, stats_data, by = c("OrfRep", "Experiment"), all = TRUE, allow.cartesian = TRUE)
+
+  final_report_file <- file.path(out_dir, "final_combined_report.csv")
+  fwrite(final_report, final_report_file, row.names = FALSE)
+
+  message("Final combined report saved to: ", final_report_file)
+}
+
+# Process all experiments and generate outputs
+combine_zscores(args$experiments, args$out_dir)
+combine_summary_stats(args$experiments, args$out_dir)
+generate_final_report(args$out_dir)