Add new configuration file

2024-08-22 04:20:40 -04:00
parent a300484ceb
commit bcc8dba3e4
8 changed files with 1824 additions and 1016 deletions
--- a/workflow/.old/apps/shell/submodules/parse_study_info
+++ b/workflow/.old/apps/shell/submodules/parse_study_info
@@ -0,0 +1,121 @@
 #!/usr/bin/env/bash
 # @description Creates, modifies, and parses the study info file
 #
 # TODO
 #
 # * Needs refactoring
 # * Ended up combining a few functions into one
 #
 #
 # @exitcode 0 If one or more studies found
 # @exitcode 1 If no studies found
 # @set STUDIES array contains array of "exp# sd ExpDir"
 parse_study_info() {
  debug "Running: ${FUNCNAME[0]}"
  # Only run this once per project
  # in case we run multiple modules
  (( SET_STUDIES )) && return 0
  declare -g SET_STUDIES=1
  # Use initials from project or whoami?
  # Best I can do is first two letters of username
  # See TODO in markdown
  initials="${USER:0:2}"
  INITIALS=${initials^^}
  empty_study=1
  # Find an Exp directory that does not exist
  while [[ -d $STUDY_RESULTS_DIR/exp$empty_study ]]; do
    (( empty_study++ ))
  done
  next_study_entry="$empty_study,$PROJECT_NAME,NA,NA,$INITIALS"
  echo "${underline}Study Info File${nounderline}"
  if [[ -f $STUDY_INFO_FILE ]]; then
    # Get latest entry
    while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column
      studies_nums+=("$col1")
    done < <(tail -n +2 "$STUDY_INFO_FILE")
    largest=${studies_nums[0]}
    for i in "${studies_nums[@]}"; do
      if ((i > largest)); then
        largest=$i
      fi
    done
    empty_study=$((largest+1))
    next_study_entry="$((empty_study)),$PROJECT_NAME,NA,NA,$INITIALS"
  else # create a default study info file
    echo "ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy" > "$STUDY_INFO_FILE"
    echo "$next_study_entry" >> "$STUDY_INFO_FILE"
    next_study_entry="$((empty_study+1)),$PROJECT_NAME,NA,NA,$INITIALS"
  fi
  # Print current studies
  cat <<-EOF 
 		* Give each experiment labels to be used for the plots and specific files. 
 		* Enter the desired experiment names in the order they should appear in the REMc heatmaps
 		Current study info file contents:
 		${underline}$STUDY_INFO_FILE${nounderline}
 		$(cat "$STUDY_INFO_FILE")
 	EOF
  # Allow user to add/edit the study info file
  if ! ((YES)); then
    for ((i=1; i<2; i++)); do
      cat <<-EOF
 				Next entry suggestion: "$next_study_entry"
 				Would you like to: 
 				* (a)dd the suggested entry
 				* (e)dit the study info file manually
 				* (c)ontinue (default)
 			EOF
      read -r -p "(c): " response
      echo ""
      [[ -z $response ]] && break
      case $response in
        a)
          echo "Adding auto-entry suggestion to $STUDY_INFO_FILE"
          echo "$next_study_entry" >> "$STUDY_INFO_FILE"
          next_study_entry="$((empty_study+1)),$PROJECT_NAME,NA,NA,$INITIALS"
          i=0
          ;;
        e)
          debug "$EDITOR $STUDY_INFO_FILE"
          "$EDITOR" "$STUDY_INFO_FILE"
          ;;
        c)
          break
          ;;
        *)
          err "Invalid response, please try again"
          i=0
          ;;
      esac
      break
    done
  fi
  # Read study info file
  declare -ga STUDIES
  while IFS=',' read -r num _ sd _; do
    STUDIES+=("$num $sd $STUDY_RESULTS_DIR/exp$num")
  done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header
  # Initialize missing Exp dirs
  for study in "${STUDIES[@]}"; do
    read -r _ _ dir <<< "$study"
    [[ -d $dir ]] || execute mkdir "$dir"
  done
  ((DEBUG)) && declare -p STUDIES
  # Return true if at least one study was found
  [[ ${#STUDIES[@]} -gt 0 ]]
 }
--- a/workflow/apps/python/join_interactions.py
+++ b/workflow/apps/python/join_interactions.py
@@ -0,0 +1,125 @@
 import pandas as pd
 import os
 import sys
 import numpy as np
 # Function to parse and set arguments
 def parse_arguments():
    if len(sys.argv) == 1:  # Interactive mode
        args = [
            "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
            2,
            "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
            "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1",
            "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp2"
        ]
    else:
        args = sys.argv[1:]
    return {
        "out_dir": os.path.abspath(args[0]),
        "sd": float(args[1]),
        "study_info": os.path.abspath(args[2]),
        "input_dirs": args[3:]
    }
 args = parse_arguments()
 # Create an array for the zscores files
 def get_zscores_files(dirs):
    files = [os.path.join(study, "zscores", "zscores_interaction.csv") 
             for study in dirs if os.path.exists(os.path.join(study, "zscores", "zscores_interaction.csv"))]
    return files
 zscores_files = get_zscores_files(args['input_dirs'])
 print(f"The SD value is: {args['sd']}")
 # Ensure there are enough files to compare
 if len(zscores_files) < 2:
    sys.exit("Not enough experiments to compare, exiting script")
 # Function to join zscores files
 def join_zscores_files(files):
    joined_data = pd.read_csv(files[0])
    for file in files[1:]:
        temp_data = pd.read_csv(file)
        joined_data = pd.merge(joined_data, temp_data, on="OrfRep", how="outer")
    return joined_data
 # Load and join zscores files
 joined_data = join_zscores_files(zscores_files)
 # Order and select columns
 def order_and_select_columns(data):
    ordered_data = data[sorted(data.columns)]
    selected_headers = ordered_data.filter(regex="OrfRep|Gene|z_lm_k|z_shift_k|z_lm_l|z_shift_l")
    return selected_headers
 selected_headers = order_and_select_columns(joined_data)
 # Remove redundant columns like "Gene.1"
 def clean_headers(data, suffixes):
    suffixes_to_remove = [f"Gene.{i}" for i in range(1, suffixes+1)]
    return data.drop(columns=suffixes_to_remove, errors='ignore')
 headSel = clean_headers(selected_headers, len(zscores_files) - 1)
 headSel2 = clean_headers(joined_data.filter(regex="OrfRep|Gene"), len(zscores_files) - 1)
 # Fill NA values in Shift and Z_lm columns
 def fill_na_in_columns(data):
    for column in data.columns:
        if "Shift" in column:
            data[column].fillna(0.001, inplace=True)
        elif "Z_lm_" in column:
            data[column].fillna(0.0001, inplace=True)
    return data
 headSel = fill_na_in_columns(headSel)
 # Filter based on standard deviation
 def filter_by_sd(data, sd):
    if sd == 0:
        return data
    z_lm_cols = data.filter(regex="z_lm_")
    filter_vector = z_lm_cols.abs().ge(sd).any(axis=1)
    return data[filter_vector]
 REMcRdy = filter_by_sd(headSel.filter(regex="OrfRep|Gene|z_lm_"), args['sd'])
 shiftOnly = filter_by_sd(headSel.filter(regex="OrfRep|Gene|z_shift"), args['sd'])
 # Reorder columns to interleave Z_lm and Shift data
 def reorder_columns(data1, data2):
    combined_data = data1.copy()
    for i in range(2, data1.shape[1]):
        combined_data.insert(2 * i - 1, data2.columns[i], data2.iloc[:, i])
    return combined_data
 combI = reorder_columns(headSel2, shiftOnly)
 # Write output files
 REMcRdy.to_csv(os.path.join(args['out_dir'], "REMcRdy_lm_only.csv"), index=False, quotechar=False)
 shiftOnly.to_csv(os.path.join(args['out_dir'], "Shift_only.csv"), index=False, quotechar=False)
 # Relabel headers using experiment names from StudyInfo.csv
 def relabel_headers(headers, labels):
    new_labels = headers.copy()
    for i, header in enumerate(headers):
        suffix = header.split('.')[-1]
        if suffix.isdigit() and int(suffix) in range(1, 4):
            exp_name = labels.iloc[int(suffix) - 1, 1]
            new_labels[i] = header.replace(f".{suffix}", f"_{exp_name}")
    return new_labels
 LabelStd = pd.read_csv(args['study_info'])
 shiftOnly.columns = relabel_headers(shiftOnly.columns, LabelStd)
 REMcRdy.columns = relabel_headers(REMcRdy.columns, LabelStd)
 # Save relabeled files
 REMcRdy.to_csv(os.path.join(args['out_dir'], "REMcRdy_lm_only.csv"), index=False, quotechar=False)
 shiftOnly.to_csv(os.path.join(args['out_dir'], "Shift_only.csv"), index=False, quotechar=False)
 # Save updated parameters
 LabelStd.iloc[:, 3] = args['sd']
 LabelStd.to_csv(os.path.join(args['out_dir'], "parameters.csv"), index=False)
 LabelStd.to_csv(args['study_info'], index=False)
--- a/workflow/apps/r/PairwiseLK.R
+++ b/workflow/apps/r/PairwiseLK.R
@@ -1,12 +1,4 @@
 #!/usr/bin/env Rscript
 # This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments
 #
 # Updated 240724 Bryan C Roessler to improve file operations and portability
 # NOTE: The two required arguments are the same and now there are two optional arguments
 #   1. Exp1
 #   2. Exp2
 #   3. StudyInfo.csv file
 #   4. Output Directory
 library("ggplot2")
 library("plotly")
@@ -16,31 +8,14 @@ library("grid")
 library("ggthemes")
 args <- commandArgs(TRUE)
-exp_name <- args[1]
+exp1_name <- args[1]
-exp_name2 <- args[2]
+exp1_file <- args[2]
 exp2_name <- args[3]
 exp2_file <- args[4]
 output_dir <- args[5]
-if (length(args) >= 3) {
+pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp1_name, "-", exp2_name))
-  study_info_file <- args[3]
+pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp1_name, "-", exp2_name))
 } else {
  study_info_file <- "StudyInfo.csv"
 }
 if (length(args) >= 4) {
  output_dir <- args[4]
 } else {
  output_dir <- "gta"
 }
 expNumber1 <- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name))
 expNumber2 <- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name2))
 Labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE)
 Name1 <- Labels[expNumber1, 2]
 Name2 <- Labels[expNumber2, 2]
 go_terms_file <- "Average_GOTerms_All.csv"
 input_file1 <- file.path(output_dir, exp_name, go_terms_file)
 input_file2 <- file.path(output_dir, exp_name2, go_terms_file)
 pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp_name, "-", exp_name2))
 pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp_name, "-", exp_name2))
 # Pairwise L
 # outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
@@ -75,21 +50,6 @@ theme_Publication <- function(base_size = 14, base_family = "sans") {
  )
 }
 scale_fill_Publication <- function(...) {
  library(scales)
  discrete_scale("fill", "Publication", manual_pal(
    values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...
  )
 }
 scale_colour_Publication <- function(...) {
  discrete_scale("colour", "Publication", manual_pal(
    values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...
  )
 }
 theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
  (theme_foundation(base_size = base_size, base_family = base_family) +
    theme(
@@ -131,8 +91,8 @@ scale_colour_Publication <- function(...) {
    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
 }
-X1 <- read.csv(file = input_file1, stringsAsFactors = FALSE, header = TRUE)
+X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
-X2 <- read.csv(file = input_file2, stringsAsFactors = FALSE, header = TRUE)
+X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
 X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
 gg <- ggplot(data = X, aes(
@@ -146,7 +106,7 @@ gg <- ggplot(data = X, aes(
  SD_1 = Z_lm_L_SD_X1,
  SD_2 = Z_lm_L_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_rect(
    aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
    color = "grey20",
@@ -156,12 +116,12 @@ gg <- ggplot(data = X, aes(
    fill = NA
  ) +
  geom_point(shape = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOntology.pdf")),
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
  width = 12,
  height = 8
 )
@@ -169,7 +129,7 @@ pdf(
 gg
 dev.off()
 pgg <- ggplotly(gg)
-fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOntology.html"))
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
 htmlwidgets::saveWidget(pgg, fname)
 # ID aggravators and alleviators, regardless of whether they meet 2SD threshold
@@ -184,19 +144,19 @@ X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm
 X$Overlap_Avg <- NA
 try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
-  paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
+  paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
 try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
-  paste(Name1, "Specific_Deletion_Suppresors", sep = "_"))
+  paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
 try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
-  paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
+  paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
 try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
-  paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
+  paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
 try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
 try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
 try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
-  paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
+  paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
 try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
-  paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
+  paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
 gg <- ggplot(data = X, aes(
  x = Z_lm_L_Avg_X1,
@@ -209,7 +169,7 @@ gg <- ggplot(data = X, aes(
  SD_1 = Z_lm_L_SD_X1,
  SD_2 = Z_lm_L_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_rect(
    aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
    color = "grey20",
@@ -219,12 +179,12 @@ gg <- ggplot(data = X, aes(
    fill = NA
  ) +
  geom_point(shape = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap.pdf")),
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
  width = 12,
  height = 8
 )
@@ -232,7 +192,7 @@ pdf(
 gg
 dev.off()
 pgg <- ggplotly(gg)
-fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
 htmlwidgets::saveWidget(pgg, fname)
 x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
@@ -249,15 +209,15 @@ gg <- ggplot(data = x_rem2_gene, aes(
  SD_1 = Z_lm_L_SD_X1,
  SD_2 = Z_lm_L_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_above2genes.pdf")),
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
  width = 12,
  height = 8
 )
@@ -266,7 +226,7 @@ gg
 dev.off()
 pgg <- ggplotly(gg)
 #pgg
-fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
 htmlwidgets::saveWidget(pgg, fname)
 #4
@@ -282,15 +242,15 @@ gg <- ggplot(data = X_overlap_nothresold, aes(
  SD_1 = Z_lm_L_SD_X1,
  SD_2 = Z_lm_L_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.pdf")),
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
  width = 12,
  height = 8
 )
@@ -299,7 +259,7 @@ gg
 dev.off()
 pgg <- ggplotly(gg)
 #pgg
-fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.html"))
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
 htmlwidgets::saveWidget(pgg, fname)
 # Only output GTA terms where average score is still above 2 after subtracting the SD
@@ -320,16 +280,16 @@ X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 &
 X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ]
 X$Overlap <- NA
-try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
-try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(Name1, "Specific_Deletion_Suppresors", sep = "_"))
+try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
-try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
-try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
+try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
 try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Enhancers")
 try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Suppressors")
 try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
-  paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
+  paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
 try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
-  paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
+  paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
 #5
 X_abovethreshold <- X[!(is.na(X$Overlap)), ]
@@ -345,15 +305,15 @@ gg <- ggplot(data = X_abovethreshold, aes(
  SD_1 = Z_lm_L_SD_X1,
  SD_2 = Z_lm_L_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.pdf")),
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
  width = 12,
  height = 8
 )
@@ -362,7 +322,7 @@ gg
 dev.off()
 pgg <- ggplotly(gg)
 #pgg
-fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.html"))
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
 htmlwidgets::saveWidget(pgg, fname)
 #6
@@ -377,16 +337,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
  SD_1 = Z_lm_L_SD_X1,
  SD_2 = Z_lm_L_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3, size = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.pdf")),
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
  width = 20,
  height = 20
 )
@@ -395,7 +355,7 @@ gg
 dev.off()
 pgg <- ggplotly(gg)
 #pgg
-fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.html"))
+fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
 htmlwidgets::saveWidget(pgg, fname)
 X_abovethreshold$X1_Rank <- NA
@@ -415,16 +375,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
  SD_1 = Z_lm_L_SD_X1,
  SD_2 = Z_lm_L_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3, size = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
  width = 15,
  height = 15
 )
@@ -435,7 +395,7 @@ pgg <- ggplotly(gg)
 #pgg
 fname <-
-  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
 htmlwidgets::saveWidget(pgg, fname)
 #8
@@ -450,16 +410,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
  SD_1 = Z_lm_L_SD_X1,
  SD_2 = Z_lm_L_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3, size = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
  width = 15,
  height = 15
 )
@@ -469,18 +429,18 @@ dev.off()
 pgg <- ggplotly(gg)
 #pgg
 fname <-
-  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
+  file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
 htmlwidgets::saveWidget(pgg, fname)
 write.csv(
  x = X,
-  file.path(pairDirL, paste0("All_GTA_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
+  file.path(pairDirL, paste0("All_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
  row.names = FALSE
 )
 write.csv(
  x = X_abovethreshold,
-  file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
+  file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
  row.names = FALSE
 )
@@ -582,8 +542,8 @@ scale_colour_Publication <- function(...) {
  )
 }
-X1 <- read.csv(file = input_file1, stringsAsFactors = FALSE, header = TRUE)
+X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
-X2 <- read.csv(file = input_file2, stringsAsFactors = FALSE, header = TRUE)
+X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
 #1
 X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
@@ -599,15 +559,15 @@ gg <- ggplot(data = X, aes(
  SD_1 = Z_lm_K_SD_X1,
  SD_2 = Z_lm_K_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOntology.pdf")),
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
  width = 12,
  height = 8
 )
@@ -616,7 +576,7 @@ gg
 dev.off()
 pgg <- ggplotly(gg)
 #pgg
-fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOntology.html"))
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
 htmlwidgets::saveWidget(pgg, fname)
 #2
@@ -632,23 +592,23 @@ X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm
 X$Overlap_Avg <- NA
 try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
-  paste(Name1, "Specific_Deletion_Suppressors", sep = "_"))
+  paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
 try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
-  paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
+  paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
 try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
-  paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
+  paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
 try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
-  paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
+  paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
 try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <-
  "Overlapping_Deletion_Suppressors")
 try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <-
  "Overlapping_Deletion_Enhancers")
 try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
-  paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
+  paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
 try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
-  paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
+  paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
-plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
+plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
 gg <- ggplot(data = X, aes(
  x = Z_lm_K_Avg_X1,
  y = Z_lm_K_Avg_X2,
@@ -660,15 +620,15 @@ gg <- ggplot(data = X, aes(
  SD_1 = Z_lm_K_SD_X1,
  SD_2 = Z_lm_K_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap.pdf")),
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
  width = 12,
  height = 8
 )
@@ -678,12 +638,12 @@ dev.off()
 pgg <- ggplotly(gg)
 #2
-fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
 htmlwidgets::saveWidget(pgg, fname)
 #3
 x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
-plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
+plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
 gg <- ggplot(data = x_rem2_gene, aes(
  x = Z_lm_K_Avg_X1,
  y = Z_lm_K_Avg_X2,
@@ -695,15 +655,15 @@ gg <- ggplot(data = x_rem2_gene, aes(
  SD_1 = Z_lm_K_SD_X1,
  SD_2 = Z_lm_K_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_above2genes.pdf")),
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
  width = 12,
  height = 8
 )
@@ -712,7 +672,7 @@ gg
 dev.off()
 pgg <- ggplotly(gg)
 #pgg
-fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
 htmlwidgets::saveWidget(pgg, fname)
 #4
@@ -728,15 +688,15 @@ gg <- ggplot(data = X_overlap_nothresold, aes(
  SD_1 = Z_lm_K_SD_X1,
  SD_2 = Z_lm_K_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.pdf")),
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
  width = 12,
  height = 8
 )
@@ -746,7 +706,7 @@ dev.off()
 pgg <- ggplotly(gg)
 #pgg
-fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.html"))
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
 htmlwidgets::saveWidget(pgg, fname)
 #5
@@ -769,21 +729,21 @@ X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 &
 X$Overlap <- NA
 try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <-
-  paste(Name1, "Specific_Deletion_Suppressors", sep = "_"))
+  paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
 try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <-
-  paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
+  paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
 try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <-
-  paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
+  paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
 try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <-
-  paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
+  paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
 try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <-
  "Overlapping_Deletion_Suppressors")
 try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <-
  "Overlapping_Deletion_Enhancers")
 try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
-  paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
+  paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
 try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
-  paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
+  paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
 X_abovethreshold <- X[!(is.na(X$Overlap)), ]
 gg <- ggplot(data = X_abovethreshold, aes(
@@ -797,15 +757,15 @@ gg <- ggplot(data = X_abovethreshold, aes(
  SD_1 = Z_lm_K_SD_X1,
  SD_2 = Z_lm_K_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.pdf")),
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
  width = 12,
  height = 8
 )
@@ -814,7 +774,7 @@ gg
 dev.off()
 pgg <- ggplotly(gg)
 #pgg
-fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.html"))
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
 htmlwidgets::saveWidget(pgg, fname)
 #6
@@ -829,16 +789,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
  SD_1 = Z_lm_K_SD_X1,
  SD_2 = Z_lm_K_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3, size = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.pdf")),
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
  width = 20,
  height = 20
 )
@@ -846,7 +806,7 @@ gg
 dev.off()
 pgg <- ggplotly(gg)
 #pgg
-fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.html"))
+fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
 htmlwidgets::saveWidget(pgg, fname)
 #7
@@ -866,16 +826,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
  SD_1 = Z_lm_K_SD_X1,
  SD_2 = Z_lm_K_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3, size = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
  width = 15,
  height = 15
 )
@@ -885,7 +845,7 @@ dev.off()
 pgg <- ggplotly(gg)
 #pgg
 fname <-
-  file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
+  file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
 htmlwidgets::saveWidget(pgg, fname)
 #8
@@ -900,16 +860,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
  SD_1 = Z_lm_K_SD_X1,
  SD_2 = Z_lm_K_SD_X2
 )) +
-  xlab(paste("GO Term Avg lm Z for", Name1)) +
+  xlab(paste("GO Term Avg lm Z for", exp1_name)) +
  geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
  geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
  geom_point(shape = 3, size = 3) +
-  ylab(paste("GO Term Avg lm Z for", Name2)) +
+  ylab(paste("GO Term Avg lm Z for", exp2_name)) +
-  ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
+  ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
  theme_Publication_legend_right()
 pdf(
-  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
+  file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
  width = 15,
  height = 15
 )
@@ -919,17 +879,17 @@ dev.off()
 pgg <- ggplotly(gg)
 #pgg
 fname <-
-  file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
+  file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
 htmlwidgets::saveWidget(pgg, fname)
 write.csv(
  x = X,
-  file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
+  file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
  row.names = FALSE
 )
 write.csv(
  x = X_abovethreshold,
-  file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
+  file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
  row.names = FALSE
 )
--- a/workflow/apps/r/calculate_interaction_zscores.R
+++ b/workflow/apps/r/calculate_interaction_zscores.R
@@ -0,0 +1,335 @@
 suppressMessages({
  if (!require("ggplot2")) stop("Package ggplot2 is required but not installed.")
  if (!require("plotly")) stop("Package plotly is required but not installed.")
  if (!require("htmlwidgets")) stop("Package htmlwidgets is required but not installed.")
  if (!require("dplyr")) stop("Package dplyr is required but not installed.")
  if (!require("ggthemes")) stop("Package ggthemes is required but not installed.")
  if (!require("plyr")) stop("Package plyr is required but not installed.")
 })
 # Constants for configuration
 PLOT_WIDTH <- 14
 PLOT_HEIGHT <- 9
 BASE_SIZE <- 14
 options(warn = 2, max.print = 100)
 parse_arguments <- function() {
  args <- if (interactive()) {
    c(
      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD",
      3,
      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt",
      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
      "Experiment 1: Doxo versus HLD",
      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
      "Experiment 2: HLD versus Doxo",
      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp3",
      "Experiment 3: HLD versus WT",
      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp4",
      "Experiment 4: Doxo versus WT"
    )
  } else {
    commandArgs(trailingOnly = TRUE)
  }
  paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE)
  names <- args[seq(6, length(args), by = 2)]
  experiments <- setNames(paths, names)
  list(
    out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
    sd = as.numeric(args[2]),
    sgd_gene_list = normalizePath(file.path(args[3]), mustWork = FALSE),
    easy_results_file = normalizePath(file.path(args[4]), mustWork = FALSE),
    experiments = experiments
  )
 }
 args <- parse_arguments()
 dir.create(args$out_dir, showWarnings = FALSE)
 # Define themes and scales
 theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
  theme_foundation(base_size = base_size, base_family = base_family) +
    theme(
      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
      text = element_text(),
      panel.background = element_rect(colour = NA),
      plot.background = element_rect(colour = NA),
      panel.border = element_rect(colour = NA),
      axis.title = element_text(face = "bold", size = rel(1)),
      axis.title.y = element_text(angle = 90, vjust = 2),
      axis.title.x = element_text(vjust = -0.2),
      axis.line = element_line(colour = "black"),
      panel.grid.major = element_line(colour = "#f0f0f0"),
      panel.grid.minor = element_blank(),
      legend.key = element_rect(colour = NA),
      legend.position = "bottom",
      legend.direction = "horizontal",
      plot.margin = unit(c(10, 5, 5, 5), "mm"),
      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
      strip.text = element_text(face = "bold")
    )
 }
 theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
  theme_publication(base_size, base_family) +
    theme(
      legend.position = "right",
      legend.direction = "vertical",
      legend.key.size = unit(0.5, "cm"),
      legend.spacing = unit(0, "cm"),
      legend.title = element_text(face = "italic")
    )
 }
 scale_fill_publication <- function(...) {
  discrete_scale("fill", "Publication", manual_pal(values = c(
    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
  )), ...)
 }
 scale_colour_publication <- function(...) {
  discrete_scale("colour", "Publication", manual_pal(values = c(
    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
  )), ...)
 }
 # Load SGD gene list
 sgd_genes <- function(sgd_gene_list) {
  read.delim(file = sgd_gene_list, quote = "", header = FALSE,
             colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
    dplyr::rename(ORF = V4, GeneName = V5)
 }
 genes <- sgd_genes(args$sgd_gene_list)
 load_and_preprocess_data <- function(easy_results_file, genes) {
  df <- tryCatch({
    read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
  }, error = function(e) {
    stop("Error reading file: ", easy_results_file, "\n", e$message)
  }) %>%
    filter(!.[[1]] %in% c("", "Scan"))  # Fixed syntax
  numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
  df[numeric_columns[numeric_columns %in% colnames(df)]] <- 
    lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)
  df <- df %>%
    mutate(
      L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA},
      AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA},
      conc_num = if ("Conc" %in% colnames(.)) as.numeric(gsub("[^0-9\\.]", "", Conc)) else NA,
      delta_bg = if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(.)))
        LstBackgrd - X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA},
      GeneName = vapply(ORF, function(orf) {
        gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
        ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
      }, character(1))  # Ensures a character vector is returned
    )
  if (nrow(df) == 0) warning("Dataframe is empty after filtering")
  return(df)
 }
 create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
  plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot
  filtered_df <- filter(df, is.finite(.data[[var]]))
  p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
    plot_func(shape = 3, size = 0.2, position = "jitter") +
    stat_summary(fun = mean, geom = "point", size = 0.6) +
    stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
    ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
    theme_publication()
  pdf_path <- file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))
  pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
  print(p)
  dev.off()
  html_path <- sub(".pdf$", ".html", pdf_path)
  pgg <- suppressWarnings(ggplotly(p, tooltip = c("L", "K", "ORF", "Gene", "delta_bg", "GeneName")) %>%
                            layout(legend = list(orientation = "h")))
  saveWidget(pgg, html_path, selfcontained = TRUE)
 }
 publish_summary_stats <- function(df, variables, out_dir) {
  stats_list <- lapply(variables, function(var) {
    df %>%
      dplyr::group_by(OrfRep, conc_num) %>%
      dplyr::summarize(
        N = dplyr::n(),  # Ensure that the correct version of n() is used
        mean_val = mean(.data[[var]], na.rm = TRUE),
        sd_val = sd(.data[[var]], na.rm = TRUE),
        se_val = sd_val / sqrt(N)
      )
  })
  summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable")
  write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
 }
 publish_interaction_scores <- function(df, out_dir) {
  interaction_scores <- df %>%
    group_by(OrfRep) %>%
    summarize(
      mean_L = mean(L, na.rm = TRUE),
      mean_K = mean(K, na.rm = TRUE),
      mean_r = mean(r, na.rm = TRUE),
      mean_AUC = mean(AUC, na.rm = TRUE),
      delta_bg_mean = mean(delta_bg, na.rm = TRUE),
      delta_bg_sd = sd(delta_bg, na.rm = TRUE)
    ) %>%
    mutate(
      l_rank = rank(mean_L),
      k_rank = rank(mean_K),
      r_rank = rank(mean_r),
      auc_rank = rank(mean_AUC)
    )
  write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
  write.csv(arrange(interaction_scores, l_rank, k_rank),
            file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
 }
 publish_zscores <- function(df, out_dir) {
  zscores <- df %>%
    mutate(
      zscore_L = scale(L, center = TRUE, scale = TRUE),
      zscore_K = scale(K, center = TRUE, scale = TRUE),
      zscore_r = scale(r, center = TRUE, scale = TRUE),
      zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
    )
  write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
 }
 generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
  variables <- c("L", "K", "r", "AUC", "delta_bg")
  lapply(variables, create_and_publish_plot, df = df, plot_type = "scatter", out_dir_qc = out_dir_qc)
  delta_bg_above_tolerance <- filter(df, delta_bg >= delta_bg_tolerance)
  lapply(variables, create_and_publish_plot, df = delta_bg_above_tolerance,
         plot_type = "scatter", out_dir_qc = out_dir_qc, suffix = "_above_tolerance")
 }
 process_exp_dir <- function(exp_dir, exp_name, genes, easy_results_file) {
  out_dir <- file.path(exp_dir, "zscores")
  out_dir_qc <- file.path(exp_dir, "qc")
  dir.create(out_dir, showWarnings = FALSE)
  dir.create(out_dir_qc, showWarnings = FALSE)
  df <- load_and_preprocess_data(easy_results_file, genes)
  delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + 3 * sd(df$delta_bg, na.rm = TRUE)
  generate_and_publish_qc(df, delta_bg_tolerance, out_dir_qc)
  variables <- c("L", "K", "r", "AUC", "delta_bg")
  publish_summary_stats(df, variables, out_dir)
  publish_interaction_scores(df, out_dir)
  publish_zscores(df, out_dir)
  list(
    zscores_file = file.path(out_dir, "zscores_interaction.csv"),
    exp_name = exp_name
  )
 }
 processed_experiments <- lapply(names(args$experiments), function(exp_name) {
  process_exp_dir(args$experiments[[exp_name]], exp_name, genes, args$easy_results_file)
 })
 zscores_files <- sapply(processed_experiments, `[[`, "zscores_file")
 exp_names <- sapply(processed_experiments, `[[`, "exp_name")
 combine_zscores <- function(zscores_files) {
  if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
  joined_data <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
  for (file in zscores_files[-1]) {
    temp_data <- read.csv(file = file, stringsAsFactors = FALSE)
    joined_data <- plyr::join(joined_data, temp_data, by = "OrfRep", type = "full")
  }
  joined_data
 }
 process_combined_zscores <- function(joined_data, sd, out_dir, exp_names) {
  ordered_data <- joined_data %>%
    select(contains("OrfRep"), matches("Gene"), 
           contains("z_lm_k"), contains("z_shift_k"), 
           contains("z_lm_l"), contains("z_shift_l")) %>%
    arrange(contains("OrfRep"))
  clean_headers <- function(data, suffixes) {
    suffixes_to_remove <- paste0("Gene.", seq_len(suffixes))
    select(data, -all_of(suffixes_to_remove))
  }
  headSel <- clean_headers(ordered_data, length(zscores_files) - 1)
  headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1)
  fill_na_in_columns <- function(data) {
    for (header in colnames(data)) {
      if (grepl("Shift", header)) {
        data[[header]][is.na(data[[header]])] <- 0.001
      } else if (grepl("Z_lm_", header)) {
        data[[header]][is.na(data[[header]])] <- 0.0001
      }
    }
    data
  }
  headSel <- fill_na_in_columns(headSel)
  filter_by_sd <- function(data, sd) {
    if (sd == 0) return(data)
    z_lm_cols <- select(data, contains("z_lm_"))
    filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
    data[filter_vector, ]
  }
  REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), sd)
  shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), sd)
  reorder_columns <- function(data1, data2) {
    combined_data <- data1
    for (i in 3:ncol(data1)) {
      combined_data <- cbind(combined_data, data2[i], data1[i])
    }
    combined_data
  }
  combI <- reorder_columns(headSel2, shiftOnly)
  write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
  write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
  relabel_headers <- function(headers, exp_names) {
    new_labels <- headers
    for (i in seq_along(headers)) {
      suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
      if (suffix %in% seq_along(exp_names)) {
        exp_name <- exp_names[as.numeric(suffix)]
        new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
      }
    }
    new_labels
  }
  colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), exp_names)
  colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), exp_names)
  write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
  write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
 }
 joined_data <- combine_zscores(zscores_files)
 process_combined_zscores(joined_data, args$sd, args$out_dir, exp_names)
--- a/workflow/apps/r/calculate_pairwise_lk.R
+++ b/workflow/apps/r/calculate_pairwise_lk.R
@@ -0,0 +1,209 @@
 suppressMessages({
  library("ggplot2")
  library("plotly")
  library("htmlwidgets")
  library("extrafont")
  library("grid")
  library("ggthemes")
 })
 # Constants for configuration
 PLOT_WIDTH <- 12
 PLOT_HEIGHT <- 8
 BASE_SIZE <- 14
 options(warn = 2, max.print = 100)
 # Parse arguments
 parse_arguments <- function() {
  if (interactive()) {
    args <- c(
      "Exp1",
      "/path/to/exp1_file.csv",
      "Exp2",
      "/path/to/exp2_file.csv",
      "/path/to/output_dir"
    )
  } else {
    args <- commandArgs(trailingOnly = TRUE)
  }
  list(
    exp1_name = args[1],
    exp1_file = normalizePath(args[2], mustWork = TRUE),
    exp2_name = args[3],
    exp2_file = normalizePath(args[4], mustWork = TRUE),
    output_dir = normalizePath(args[5], mustWork = FALSE)
  )
 }
 args <- parse_arguments()
 # Create output directories if they don't exist
 pairDirL <- file.path(args$output_dir, paste0("PairwiseCompareL_", args$exp1_name, "-", args$exp2_name))
 pairDirK <- file.path(args$output_dir, paste0("PairwiseCompareK_", args$exp1_name, "-", args$exp2_name))
 dir.create(pairDirL, showWarnings = FALSE, recursive = TRUE)
 dir.create(pairDirK, showWarnings = FALSE, recursive = TRUE)
 # Define themes and scales
 theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
  theme_foundation(base_size = base_size, base_family = base_family) +
    theme(
      plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
      panel.background = element_rect(colour = NA),
      plot.background = element_rect(colour = NA),
      panel.border = element_rect(colour = NA),
      axis.title = element_text(face = "bold", size = rel(1)),
      axis.line = element_line(colour = "black"),
      panel.grid.major = element_line(colour = "#f0f0f0"),
      panel.grid.minor = element_blank(),
      legend.position = "bottom",
      legend.direction = "horizontal",
      legend.key.size = unit(0.2, "cm"),
      plot.margin = unit(c(10, 5, 5, 5), "mm"),
      strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
      strip.text = element_text(face = "bold")
    )
 }
 theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
  theme_publication(base_size, base_family) +
    theme(
      legend.position = "right",
      legend.direction = "vertical",
      legend.key.size = unit(0.5, "cm")
    )
 }
 scale_fill_publication <- function(...) {
  discrete_scale("fill", "Publication", manual_pal(values = c(
    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
  )), ...)
 }
 scale_colour_publication <- function(...) {
  discrete_scale("colour", "Publication", manual_pal(values = c(
    "#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
    "#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
  )), ...)
 }
 # Read and merge data
 load_and_merge_data <- function(file1, file2) {
  df1 <- read.csv(file = file1, stringsAsFactors = FALSE, header = TRUE)
  df2 <- read.csv(file = file2, stringsAsFactors = FALSE, header = TRUE)
  merge(df1, df2, by = "Term_Avg", all = TRUE, suffixes = c("_df1", "_df2"))
 }
 # Generate a plot and save to PDF and HTML
 generate_plot <- function(data, x_var, y_var, color_var, title, file_path, theme_function) {
  gg <- ggplot(data = data, aes_string(
    x = x_var,
    y = y_var,
    color = color_var
  )) +
    xlab(paste("GO Term Avg lm Z for", args$exp1_name)) +
    geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
    geom_point(shape = 3) +
    ylab(paste("GO Term Avg lm Z for", args$exp2_name)) +
    ggtitle(title) +
    theme_function()
  pdf(file.path(file_path, ".pdf"), width = PLOT_WIDTH, height = PLOT_HEIGHT)
  print(gg)
  dev.off()
  pgg <- ggplotly(gg)
  htmlwidgets::saveWidget(pgg, file.path(file_path, ".html"))
 }
 # Identify and annotate specific interactions
 annotate_interactions <- function(df, exp1_name, exp2_name, suffix) {
  df$Overlap_Avg <- NA
  interactions <- list(
    "df1_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] < 2),
    "df1_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] > -2),
    "df2_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] < 2),
    "df2_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] > -2),
    "Overlap_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2),
    "Overlap_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2),
    "df2_Aggravators_df1_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2),
    "df2_Alleviators_df1_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2)
  )
  annotation <- list(
    df1_Specific_Aggravators = paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"),
    df1_Specific_Alleviators = paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"),
    df2_Specific_Aggravators = paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"),
    df2_Specific_Alleviators = paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"),
    Overlap_Aggravators = "Overlapping_Deletion_Enhancers",
    Overlap_Alleviators = "Overlapping_Deletion_Suppressors",
    df2_Aggravators_df1_Alleviators = paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"),
    df2_Alleviators_df1_Aggravators = paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")
  )
  for (key in names(interactions)) {
    try(df$Overlap_Avg[interactions[[key]]] <- annotation[[key]])
  }
  df
 }
 # Rank and filter data
 rank_and_filter_data <- function(df, suffix) {
  z1 <- df
  z1[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df1")]] - z1[[paste0("Z_lm_", suffix, "_SD_df1")]]
  z1[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df2")]] - z1[[paste0("Z_lm_", suffix, "_SD_df2")]]
  z2 <- df
  z2[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df1")]] + z2[[paste0("Z_lm_", suffix, "_SD_df1")]]
  z2[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df2")]] + z2[[paste0("Z_lm_", suffix, "_SD_df2")]]
  df_above_threshold <- df[!is.na(df$Overlap_Avg), ]
  df_above_threshold$df1_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df1")]], ties.method = "random")
  df_above_threshold$df2_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df2")]], ties.method = "random")
  list(z1 = z1, z2 = z2, df_above_threshold = df_above_threshold)
 }
 # Main execution function for a pairwise comparison
 run_pairwise_comparison <- function(suffix, dir) {
  df <- load_and_merge_data(args$exp1_file, args$exp2_file)
  # Generate initial ontology-based plot
  generate_plot(df,
    paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Ontology_Avg_df1",
    paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
    file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOntology")),
    theme_publication_legend_right)
  # Annotate interactions and generate overlap-based plot
  df <- annotate_interactions(df, args$exp1_name, args$exp2_name, suffix)
  ranks <- rank_and_filter_data(df, suffix)
  generate_plot(df,
    paste0("Z_lm_", suffix, "_Avg_df1"),
    paste0("Z_lm_", suffix, "_Avg_df2"),
    "Overlap_Avg",
    paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
    file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap")),
    theme_publication_legend_right)
  generate_plot(ranks$df_above_threshold,
    paste0("Z_lm_", suffix, "_Avg_df1"),
    paste0("Z_lm_", suffix, "_Avg_df2"),
    "Overlap_Avg",
    paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name, "Above Threshold"),
    file.path(dir,
      paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap_AboveThreshold")),
    theme_publication_legend_right)
  # Save CSV files
  write.csv(df, file.path(dir,
    paste0("All_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
    row.names = FALSE)
  write.csv(ranks$df_above_threshold,
    file.path(dir, paste0("AboveThreshold_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
    row.names = FALSE)
 }
 # Execute Pairwise L and Pairwise K comparisons
 run_pairwise_comparison("L", pairDirL)
 run_pairwise_comparison("K", pairDirK)
--- a/workflow/apps/r/interactions.R
+++ b/workflow/apps/r/interactions.R
@@ -14,7 +14,7 @@ BASE_SIZE <- 14
 options(warn = 2, max.print = 100)
-# Function to parse arguments
+# Parse arguments
 parse_arguments <- function() {
  if (interactive()) {
    args <- c(
@@ -92,7 +92,7 @@ scale_colour_publication <- function(...) {
  )), ...)
 }
-# Function to load and preprocess data
+# Load and preprocess data
 load_and_preprocess_data <- function(input_file) {
  df <- tryCatch({
    read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
@@ -124,7 +124,7 @@ df <- df %>%
  mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
    filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21")
-# Function to create plot
+# Create plot
 create_plot <- function(df, var, plot_type) {
  filtered_df <- df %>% filter(is.finite(.data[[var]]))
  p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
@@ -143,7 +143,7 @@ create_plot <- function(df, var, plot_type) {
  return(p)
 }
-# Function to publish plot to PDF and HTML (Plotly)
+# Publish plot to PDF and HTML (Plotly)
 publish_plot <- function(plot, plot_path) {
  # if (file.exists(plot_path)) {
  #   file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX))
@@ -159,7 +159,7 @@ publish_plot <- function(plot, plot_path) {
  saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE)
 }
-# Function to publish multiple plots
+# Publish multiple plots
 publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") {
  lapply(variables, function(var) {
    plot <- create_plot(df, var, plot_type)
@@ -167,7 +167,7 @@ publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix
  })
 }
-# Function to calculate and publish summary statistics
+# Calculate and publish summary statistics
 publish_summary_stats <- function(df, variables, out_dir) {
  stats_list <- lapply(variables, function(var) {
    df %>%
@@ -183,7 +183,7 @@ publish_summary_stats <- function(df, variables, out_dir) {
  write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
 }
-# Function to calculate and publish interaction scores
+# Calculate and publish interaction scores
 publish_interaction_scores <- function(df, out_dir) {
  interaction_scores <- df %>%
    group_by(OrfRep) %>%
@@ -207,7 +207,7 @@ publish_interaction_scores <- function(df, out_dir) {
    arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
 }
-# Function to publish z-scores
+# Publish z-scores
 publish_zscores <- function(df, out_dir) {
  zscores <- df %>%
    mutate(
--- a/workflow/apps/r/joinInteractExps.R
+++ b/workflow/apps/r/joinInteractExps.R
@@ -1,238 +1,375 @@
 #!/usr/bin/env Rscript
 # JoinInteractExps.R
 library("plyr")
 library("sos")
 library("dplyr")
-args <- commandArgs(TRUE)
+# Function to parse and set arguments
-
+parse_arguments <- function() {
-# Set output dir
+  if (interactive()) {
-if (length(args) >= 1) {
+    args <- c(
-  out_dir <- file.path(args[1])
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
-} else {
+      2,
-  out_dir <- "./" # for legacy workflow
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
-}
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1", 
-
+      "/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp2"
-# Set sd value
+    )
-if (length(args) >= 2) {
+  } else {
-  sd <- as.numeric(args[2])
+    args <- commandArgs(trailingOnly = TRUE)
 } else {
  sd <- 2 # default value
 }
 sprintf("SD value is: %d", sd)
 # Set study_info file
 if (length(args) >= 3) {
  study_info <- file.path(args[3])
 } else {
  study_info <- "../Code/StudyInfo.csv" # for legacy workflow
 }
 studies <- args[4:length(args)]
 print(studies)
 input_files <- c()
 for (i in seq_along(studies)) {
  study <- studies[i]
  zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
  if (file.exists(zs_file)) {
    input_files[i] <- zs_file
  }
 }
 rm(zs_file, study)
-# for (var in ls()) {
+  list(
-#   print(paste(var, ":", get(var)))
+    out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
    sd = as.numeric(args[2]),
    study_info = normalizePath(file.path(args[3]), mustWork = FALSE),
    input_dirs = args[4:length(args)]
  )
 }
 args <- parse_arguments()
 # Create an array for the zscores files
 get_zscores_files <- function(dirs) {
  files <- sapply(dirs, function(study) {
    file_path <- file.path(study, "zscores", "zscores_interaction.csv")
    if (file.exists(file_path)) file_path else NULL
  }, simplify = TRUE, USE.NAMES = FALSE)
  # Filter out NULL entries
  files[!sapply(files, is.null)]
 }
 zscores_files <- get_zscores_files(args$input_dirs)
 sprintf("The SD value is: %d", args$sd)
 # Ensure there are enough files to compare
 if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
 # Function to join zscores files
 join_zscores_files <- function(files) {
  joined_data <- read.csv(file = files[1], stringsAsFactors = FALSE)
  for (file in files[-1]) {
    temp_data <- read.csv(file = file, stringsAsFactors = FALSE)
    joined_data <- join(joined_data, temp_data, by = "OrfRep")
  }
  joined_data
 }
 # Load and join zscores files
 joined_data <- join_zscores_files(zscores_files)
 # Order and select columns
 order_and_select_columns <- function(data) {
  ordered_data <- data[, order(colnames(data))]
  selected_headers <- select(ordered_data,
    contains("OrfRep"), matches("Gene"),
    contains("z_lm_k"), contains("z_shift_k"),
    contains("z_lm_l"), contains("z_shift_l"))
  selected_headers
 }
 selected_headers <- order_and_select_columns(joined_data)
 # Remove redundant columns like "Gene.1"
 clean_headers <- function(data, suffixes) {
  suffixes_to_remove <- paste0("Gene.", seq_len(suffixes))
  select(data, -all_of(suffixes_to_remove))
 }
 headSel <- clean_headers(selected_headers, length(zscores_files) - 1)
 headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1)
 # Fill NA values in Shift and Z_lm columns
 fill_na_in_columns <- function(data) {
  for (header in colnames(data)) {
    if (grepl("Shift", header)) {
      data[[header]][is.na(data[[header]])] <- 0.001
    } else if (grepl("Z_lm_", header)) {
      data[[header]][is.na(data[[header]])] <- 0.0001
    }
  }
  data
 }
 headSel <- fill_na_in_columns(headSel)
 # Filter based on standard deviation
 filter_by_sd <- function(data, sd) {
  if (sd == 0) return(data)
  z_lm_cols <- select(data, contains("z_lm_"))
  filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
  data[filter_vector, ]
 }
 REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), args$sd)
 shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), args$sd)
 # Reorder columns to interleave Z_lm and Shift data
 reorder_columns <- function(data1, data2) {
  combined_data <- data1
  for (i in 3:ncol(data1)) {
    combined_data <- cbind(combined_data, data2[i], data1[i])
  }
  combined_data
 }
 combI <- reorder_columns(headSel2, shiftOnly)
 # Write output files
 write.csv(REMcRdy, file.path(args$out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
 write.csv(shiftOnly, file.path(args$out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
 # Relabel headers using experiment names from StudyInfo.csv
 relabel_headers <- function(headers, labels) {
  new_labels <- headers
  for (i in seq_along(headers)) {
    suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
    if (suffix %in% 1:3) {
      exp_name <- labels[as.numeric(suffix), 2]
      new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
    }
  }
  new_labels
 }
 LabelStd <- read.csv(file = args$study_info, stringsAsFactors = FALSE)
 colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), LabelStd)
 colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), LabelStd)
 # Save relabeled files
 write.csv(REMcRdy, file.path(args$out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
 write.csv(shiftOnly, file.path(args$out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
 # Save updated parameters
 LabelStd[, 4] <- args$sd
 write.csv(LabelStd, file.path(args$out_dir, "parameters.csv"), row.names = FALSE)
 write.csv(LabelStd, file = args$study_info, row.names = FALSE)
 # library("plyr")
 # library("sos")
 # library("dplyr")
 # # Function to parse and set arguments
 # parse_arguments <- function() {
 #   if (interactive()) {
 #     args <- c(
 #       "./",                          # Default out_dir
 #       "2",                           # Default SD value
 #       "../Code/StudyInfo.csv"        # Default study_info path
 #     )
 #   } else {
 #     args <- commandArgs(trailingOnly = TRUE)
 #   }
-# print(input_files)
+#   list(
-# print(length(input_files))
+#     out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
 #     sd = as.numeric(args[2]),
 #     study_info = normalizePath(file.path(args[3]), mustWork = FALSE),
 #     input_dirs = args[4:length(args)]
 #   )
 # }
-# TODO this is better handled in a loop in case you want to compare more experiments?
+# args <- parse_arguments()
 # The input is already  designed for this
 # Read in the files for your experiment and
 # Join the two files at a time as a function of how many inputFile
 # list the larger file first ? in this example X2 has the larger number of genes
 # If X1 has a larger number of genes, switch the order of X1 and X2
 if (length(input_files) < 2) {
  print("Note enough Exps to compare, skipping join")
  stop("Exiting script")
 }
-if (length(input_files) >= 2) {
+# # Create an array for the zscores files
-  X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
+# zscores_files <- sapply(args$input_dirs, function(study) {
-  X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
+#   file_path <- file.path(study, "zscores", "zscores_interaction.csv")
-  X <- join(X1, X2, by = "OrfRep")
+#     if (file.exists(file_path)) file_path else NULL
-  OBH <- X[, order(colnames(X))]  # OrderByHeader
+#     }, simplify = TRUE, USE.NAMES = FALSE)
  headers <- select(OBH, contains("OrfRep"), matches("Gene"),
    contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
  headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
  headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column  # frame for interleaving Z_lm with Shift colums
 }
-if (length(input_files) >= 3) {
+# # Filter out NULL entries
-  X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE)
+# zscores_files <- zscores_files[!sapply(zscores_files, is.null)]
  X <- join(X, X3, by = "OrfRep")
  headSel <- select(headers, -"Gene.1", -"Gene.2")
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
  headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
 }
-if (length(input_files) >= 4) {
+# sprintf("The SD value is: %d", args$sd)
-  X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE)
+# # print(args$input_dirs)
  X <- join(X, X4, by = "OrfRep")
  headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
  headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
  headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
 }
-# print(headers)
+# # TODO this is better handled in a loop in case you want to compare more experiments?
-# headSel$contains("Z_Shift") %>% replace_na(0.001)
+# # The input is already  designed for this
-headers <- colnames(headSel)
+# # Read in the files for your experiment and
-# print(headers)
+# # Join the two files at a time as a function of how many inputFile
-i <- 0
+# # list the larger file first ? in this example X2 has the larger number of genes
-for (i in 1:length(headers)) {
+# # If X1 has a larger number of genes, switch the order of X1 and X2
-  if (grepl("Shift", headers[i])) {
+# if (length(zscores_files) < 2) {
-    headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.001
+#   print("Note enough Exps to compare, skipping join")
-  }
+#   stop("Exiting script")
-  if (grepl("Z_lm_", headers[i])) {
+# }
    headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.0001
  }
 }
-# 2SD option code to exclude Z_lm values less than 2 standard Deviations
+# if (length(zscores_files) >= 2) {
-REMcRdy <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("z_lm_"))
+#   X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
-shiftOnly <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("z_shift"))
+#   X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
 #   X <- join(X1, X2, by = "OrfRep")
 #   OBH <- X[, order(colnames(X))]  # OrderByHeader
 #   headers <- select(OBH, contains("OrfRep"), matches("Gene"),
 #     contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
 #   headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
 #   headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
 #   headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column  # frame for interleaving Z_lm with Shift colums
 # }
-# Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
+# if (length(zscores_files) >= 3) {
-Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
+#   X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE)
 #   X <- join(X, X3, by = "OrfRep")
 #   headSel <- select(headers, -"Gene.1", -"Gene.2")
 #   headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
 #   headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
 # }
-# Using Text search grepl to relabel headers
+# if (length(zscores_files) >= 4) {
-REMcRdyHdr <- colnames(REMcRdy)
+#   X4 <- read.csv(file = zscores_files[4], stringsAsFactors = FALSE)
-REMcRdyLabels <- "asdf"
+#   X <- join(X, X4, by = "OrfRep")
-shftHdr <- colnames(shiftOnly)
+#   headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
-shiftLabels <- "asdf"
+#   headSel2 <- select(OBH,  contains("OrfRep"), matches("Gene"))
-shiftLabels[1:2] <- shftHdr[1:2]
+#   headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
-REMcRdyLabels[1:2] <- REMcRdyHdr[1:2]
+# }
-for (i in 3:(length(shftHdr))) {
+# # print(headers)
-  if (i == 3) {
+# # headSel$contains("Z_Shift") %>% replace_na(0.001)
-    shiftLabels[3] <- paste0(Labels[1, 2], ".", shftHdr[3])
+# headers <- colnames(headSel)
-    REMcRdyLabels[3] <- paste0(Labels[1, 2], ".", REMcRdyHdr[3])
+# # print(headers)
-  }
+# i <- 0
-  if (i == 5) {
+# for (i in 1:length(headers)) {
-    shiftLabels[5] <- paste0(Labels[1, 2], ".", shftHdr[5])
+#   if (grepl("Shift", headers[i])) {
-    REMcRdyLabels[5] <- paste0(Labels[1, 2], ".", REMcRdyHdr[5])
+#     headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.001
-  }
+#   }
-  if (i == 7) {
+#   if (grepl("Z_lm_", headers[i])) {
-    shiftLabels[7] <- paste0(Labels[1, 2], ".", shftHdr[7])
+#     headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.0001
-    REMcRdyLabels[7] <- paste0(Labels[1, 2], ".", REMcRdyHdr[7])
+#   }
-  }
+# }
  if (grepl(".1", shftHdr[i], fixed = true)) {
    shiftLabels[i] <- paste0(Labels[2, 2], ".", shftHdr[i])
    REMcRdyLabels[i] <- paste0(Labels[2, 2], ".", REMcRdyHdr[i])
  }
  if (grepl(".2", shftHdr[i], fixed = true)) {
    shiftLabels[i] < -paste0(Labels[3, 2], ".", shftHdr[i])
    REMcRdyLabels[i] <- paste0(Labels[3, 2], ".", REMcRdyHdr[i])
  }
  if (grepl(".3", shftHdr[i], fixed = true)) {
    shiftLabels[i] <- paste0(Labels[4, 2], ".", shftHdr[i])
    REMcRdyLabels[i] <- paste0(Labels[4, 2], ".", REMcRdyHdr[i])
  }
 }
-for (i in 3:(length(REMcRdyLabels))) {
+# # 2SD option code to exclude Z_lm values less than 2 standard Deviations
-  j <- as.integer(i)
+# REMcRdy <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("z_lm_"))
-  REMcRdyLabels[j] <- gsub("[.]", "_", REMcRdyLabels[j])
+# shiftOnly <- select(headSel,  contains("OrfRep"), matches("Gene"), contains("z_shift"))
  shiftLabels[j] <- gsub("[.]", "_", shiftLabels[j])
 }
-colnames(shiftOnly) <- shiftLabels
+# # Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
-colnames(REMcRdy) <- REMcRdyLabels
+# Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
-combI <- headSel2 # starting Template orf, Genename columns
+# # Using Text search grepl to relabel headers
 # REMcRdyHdr <- colnames(REMcRdy)
 # REMcRdyLabels <- "asdf"
 # shftHdr <- colnames(shiftOnly)
 # shiftLabels <- "asdf"
 # shiftLabels[1:2] <- shftHdr[1:2]
 # REMcRdyLabels[1:2] <- REMcRdyHdr[1:2]
-# headersRemc<-colnames(REMcRdy)
+# for (i in 3:(length(shftHdr))) {
-# Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
+#   if (i == 3) {
-for (i in 3:length(colnames(REMcRdy))) {
+#     shiftLabels[3] <- paste0(Labels[1, 2], ".", shftHdr[3])
-  combI <- cbind.data.frame(combI, shiftOnly[i])
+#     REMcRdyLabels[3] <- paste0(Labels[1, 2], ".", REMcRdyHdr[3])
-  combI <- cbind.data.frame(combI, REMcRdy[i])
+#   }
-}
+#   if (i == 5) {
 #     shiftLabels[5] <- paste0(Labels[1, 2], ".", shftHdr[5])
 #     REMcRdyLabels[5] <- paste0(Labels[1, 2], ".", REMcRdyHdr[5])
 #   }
 #   if (i == 7) {
 #     shiftLabels[7] <- paste0(Labels[1, 2], ".", shftHdr[7])
 #     REMcRdyLabels[7] <- paste0(Labels[1, 2], ".", REMcRdyHdr[7])
 #   }
 #   if (grepl(".1", shftHdr[i], fixed = true)) {
 #     shiftLabels[i] <- paste0(Labels[2, 2], ".", shftHdr[i])
 #     REMcRdyLabels[i] <- paste0(Labels[2, 2], ".", REMcRdyHdr[i])
 #   }
 #   if (grepl(".2", shftHdr[i], fixed = true)) {
 #     shiftLabels[i] < -paste0(Labels[3, 2], ".", shftHdr[i])
 #     REMcRdyLabels[i] <- paste0(Labels[3, 2], ".", REMcRdyHdr[i])
 #   }
 #   if (grepl(".3", shftHdr[i], fixed = true)) {
 #     shiftLabels[i] <- paste0(Labels[4, 2], ".", shftHdr[i])
 #     REMcRdyLabels[i] <- paste0(Labels[4, 2], ".", REMcRdyHdr[i])
 #   }
 # }
-Vec1 <- NA
+# for (i in 3:(length(REMcRdyLabels))) {
-Vec2 <- NA
+#   j <- as.integer(i)
-Vec3 <- NA
+#   REMcRdyLabels[j] <- gsub("[.]", "_", REMcRdyLabels[j])
-Vec4 <- NA
+#   shiftLabels[j] <- gsub("[.]", "_", shiftLabels[j])
-Vec5 <- NA
+# }
 Vec6 <- NA
 Vec7 <- NA
 Vec8 <- NA
-if (length(REMcRdy) == 6) {
+# colnames(shiftOnly) <- shiftLabels
-  Vec1 <- abs(REMcRdy[, 3]) >= sd
+# colnames(REMcRdy) <- REMcRdyLabels
  Vec2 <- abs(REMcRdy[, 4]) >= sd
  Vec3 <- abs(REMcRdy[, 5]) >= sd
  Vec4 <- abs(REMcRdy[, 6]) >= sd
  bolVec <- Vec1 | Vec2 | Vec3 | Vec4
  REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
  REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
  shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
  shiftOnlyGT2[, 3:6] <- shiftOnly[bolVec, 3:6]
 }
-if (length(REMcRdy) == 8) {
+# combI <- headSel2 # starting Template orf, Genename columns
  Vec1 <- abs(REMcRdy[, 3]) >= sd
  Vec2 <- abs(REMcRdy[, 4]) >= sd
  Vec3 <- abs(REMcRdy[, 5]) >= sd
  Vec4 <- abs(REMcRdy[, 6]) >= sd
  Vec5 <- abs(REMcRdy[, 7]) >= sd
  Vec6 <- abs(REMcRdy[, 8]) >= sd
  bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
  REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
  REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
  shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
  shiftOnlyGT2[, 3:8] <- shiftOnly[bolVec, 3:8]
 }
-if (length(REMcRdy) == 10) {
+# # headersRemc<-colnames(REMcRdy)
-  Vec1 <- abs(REMcRdy[, 3]) >= sd
+# # Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
-  Vec2 <- abs(REMcRdy[, 4]) >= sd
+# for (i in 3:length(colnames(REMcRdy))) {
-  Vec3 <- abs(REMcRdy[, 5]) >= sd
+#   combI <- cbind.data.frame(combI, shiftOnly[i])
-  Vec4 <- abs(REMcRdy[, 6]) >= sd
+#   combI <- cbind.data.frame(combI, REMcRdy[i])
-  Vec5 <- abs(REMcRdy[, 7]) >= sd
+# }
  Vec6 <- abs(REMcRdy[, 8]) >= sd
  Vec7 <- abs(REMcRdy[, 9]) >= sd
  Vec8 <- abs(REMcRdy[, 10]) >= sd
  bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
  REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
  REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
  shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
  shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
 }
-if (sd != 0) {
+# Vec1 <- NA
-  REMcRdy <- REMcRdyGT2  # [,2:length(REMcRdyGT2)]
+# Vec2 <- NA
-  shiftOnly <- shiftOnlyGT2  # [,2:length(shiftOnlyGT2)]
+# Vec3 <- NA
-}
+# Vec4 <- NA
 # Vec5 <- NA
 # Vec6 <- NA
 # Vec7 <- NA
 # Vec8 <- NA
-if (sd == 0) {
+# if (length(REMcRdy) == 6) {
-  REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
+#   Vec1 <- abs(REMcRdy[, 3]) >= sd
-  shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
+#   Vec2 <- abs(REMcRdy[, 4]) >= sd
-}
+#   Vec3 <- abs(REMcRdy[, 5]) >= sd
 #   Vec4 <- abs(REMcRdy[, 6]) >= sd
 #   bolVec <- Vec1 | Vec2 | Vec3 | Vec4
 #   REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
 #   REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
 #   shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
 #   shiftOnlyGT2[, 3:6] <- shiftOnly[bolVec, 3:6]
 # }
-# R places hidden "" around the header names. The following
+# if (length(REMcRdy) == 8) {
-# is intended to remove those quote so that the "" do not blow up the Java REMc.
+#   Vec1 <- abs(REMcRdy[, 3]) >= sd
-# Use ,quote=F in the write.csv statement to fix R output file.
+#   Vec2 <- abs(REMcRdy[, 4]) >= sd
-# write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
+#   Vec3 <- abs(REMcRdy[, 5]) >= sd
-write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
+#   Vec4 <- abs(REMcRdy[, 6]) >= sd
-write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
+#   Vec5 <- abs(REMcRdy[, 7]) >= sd
-# LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
+#   Vec6 <- abs(REMcRdy[, 8]) >= sd
 #   bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
 #   REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
 #   REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
 #   shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
 #   shiftOnlyGT2[, 3:8] <- shiftOnly[bolVec, 3:8]
 # }
-LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
+# if (length(REMcRdy) == 10) {
-# print(sd)
+#   Vec1 <- abs(REMcRdy[, 3]) >= sd
-LabelStd[, 4] <- as.numeric(sd)
+#   Vec2 <- abs(REMcRdy[, 4]) >= sd
-write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
+#   Vec3 <- abs(REMcRdy[, 5]) >= sd
-write.csv(LabelStd, file = study_info, row.names = FALSE)
+#   Vec4 <- abs(REMcRdy[, 6]) >= sd
 #   Vec5 <- abs(REMcRdy[, 7]) >= sd
 #   Vec6 <- abs(REMcRdy[, 8]) >= sd
 #   Vec7 <- abs(REMcRdy[, 9]) >= sd
 #   Vec8 <- abs(REMcRdy[, 10]) >= sd
 #   bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
 #   REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
 #   REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
 #   shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
 #   shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
 # }
 # if (sd != 0) {
 #   REMcRdy <- REMcRdyGT2  # [,2:length(REMcRdyGT2)]
 #   shiftOnly <- shiftOnlyGT2  # [,2:length(shiftOnlyGT2)]
 # }
 # if (sd == 0) {
 #   REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
 #   shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
 # }
 # # R places hidden "" around the header names. The following
 # # is intended to remove those quote so that the "" do not blow up the Java REMc.
 # # Use ,quote=F in the write.csv statement to fix R output file.
 # # write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
 # write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
 # write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
 # # LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
 # LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
 # # print(sd)
 # LabelStd[, 4] <- as.numeric(sd)
 # write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
 # write.csv(LabelStd, file = study_info, row.names = FALSE)
--- a/workflow/qhtcp-workflow
+++ b/workflow/qhtcp-workflow