Add new configuration file

This commit is contained in:
2024-08-22 04:20:40 -04:00
parent a300484ceb
commit bcc8dba3e4
8 changed files with 1824 additions and 1016 deletions

View File

@@ -0,0 +1,121 @@
#!/usr/bin/env/bash
# @description Creates, modifies, and parses the study info file
#
# TODO
#
# * Needs refactoring
# * Ended up combining a few functions into one
#
#
# @exitcode 0 If one or more studies found
# @exitcode 1 If no studies found
# @set STUDIES array contains array of "exp# sd ExpDir"
parse_study_info() {
debug "Running: ${FUNCNAME[0]}"
# Only run this once per project
# in case we run multiple modules
(( SET_STUDIES )) && return 0
declare -g SET_STUDIES=1
# Use initials from project or whoami?
# Best I can do is first two letters of username
# See TODO in markdown
initials="${USER:0:2}"
INITIALS=${initials^^}
empty_study=1
# Find an Exp directory that does not exist
while [[ -d $STUDY_RESULTS_DIR/exp$empty_study ]]; do
(( empty_study++ ))
done
next_study_entry="$empty_study,$PROJECT_NAME,NA,NA,$INITIALS"
echo "${underline}Study Info File${nounderline}"
if [[ -f $STUDY_INFO_FILE ]]; then
# Get latest entry
while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column
studies_nums+=("$col1")
done < <(tail -n +2 "$STUDY_INFO_FILE")
largest=${studies_nums[0]}
for i in "${studies_nums[@]}"; do
if ((i > largest)); then
largest=$i
fi
done
empty_study=$((largest+1))
next_study_entry="$((empty_study)),$PROJECT_NAME,NA,NA,$INITIALS"
else # create a default study info file
echo "ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy" > "$STUDY_INFO_FILE"
echo "$next_study_entry" >> "$STUDY_INFO_FILE"
next_study_entry="$((empty_study+1)),$PROJECT_NAME,NA,NA,$INITIALS"
fi
# Print current studies
cat <<-EOF
* Give each experiment labels to be used for the plots and specific files.
* Enter the desired experiment names in the order they should appear in the REMc heatmaps
Current study info file contents:
${underline}$STUDY_INFO_FILE${nounderline}
$(cat "$STUDY_INFO_FILE")
EOF
# Allow user to add/edit the study info file
if ! ((YES)); then
for ((i=1; i<2; i++)); do
cat <<-EOF
Next entry suggestion: "$next_study_entry"
Would you like to:
* (a)dd the suggested entry
* (e)dit the study info file manually
* (c)ontinue (default)
EOF
read -r -p "(c): " response
echo ""
[[ -z $response ]] && break
case $response in
a)
echo "Adding auto-entry suggestion to $STUDY_INFO_FILE"
echo "$next_study_entry" >> "$STUDY_INFO_FILE"
next_study_entry="$((empty_study+1)),$PROJECT_NAME,NA,NA,$INITIALS"
i=0
;;
e)
debug "$EDITOR $STUDY_INFO_FILE"
"$EDITOR" "$STUDY_INFO_FILE"
;;
c)
break
;;
*)
err "Invalid response, please try again"
i=0
;;
esac
break
done
fi
# Read study info file
declare -ga STUDIES
while IFS=',' read -r num _ sd _; do
STUDIES+=("$num $sd $STUDY_RESULTS_DIR/exp$num")
done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header
# Initialize missing Exp dirs
for study in "${STUDIES[@]}"; do
read -r _ _ dir <<< "$study"
[[ -d $dir ]] || execute mkdir "$dir"
done
((DEBUG)) && declare -p STUDIES
# Return true if at least one study was found
[[ ${#STUDIES[@]} -gt 0 ]]
}

View File

@@ -0,0 +1,125 @@
import pandas as pd
import os
import sys
import numpy as np
# Function to parse and set arguments
def parse_arguments():
if len(sys.argv) == 1: # Interactive mode
args = [
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
2,
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp2"
]
else:
args = sys.argv[1:]
return {
"out_dir": os.path.abspath(args[0]),
"sd": float(args[1]),
"study_info": os.path.abspath(args[2]),
"input_dirs": args[3:]
}
args = parse_arguments()
# Create an array for the zscores files
def get_zscores_files(dirs):
files = [os.path.join(study, "zscores", "zscores_interaction.csv")
for study in dirs if os.path.exists(os.path.join(study, "zscores", "zscores_interaction.csv"))]
return files
zscores_files = get_zscores_files(args['input_dirs'])
print(f"The SD value is: {args['sd']}")
# Ensure there are enough files to compare
if len(zscores_files) < 2:
sys.exit("Not enough experiments to compare, exiting script")
# Function to join zscores files
def join_zscores_files(files):
joined_data = pd.read_csv(files[0])
for file in files[1:]:
temp_data = pd.read_csv(file)
joined_data = pd.merge(joined_data, temp_data, on="OrfRep", how="outer")
return joined_data
# Load and join zscores files
joined_data = join_zscores_files(zscores_files)
# Order and select columns
def order_and_select_columns(data):
ordered_data = data[sorted(data.columns)]
selected_headers = ordered_data.filter(regex="OrfRep|Gene|z_lm_k|z_shift_k|z_lm_l|z_shift_l")
return selected_headers
selected_headers = order_and_select_columns(joined_data)
# Remove redundant columns like "Gene.1"
def clean_headers(data, suffixes):
suffixes_to_remove = [f"Gene.{i}" for i in range(1, suffixes+1)]
return data.drop(columns=suffixes_to_remove, errors='ignore')
headSel = clean_headers(selected_headers, len(zscores_files) - 1)
headSel2 = clean_headers(joined_data.filter(regex="OrfRep|Gene"), len(zscores_files) - 1)
# Fill NA values in Shift and Z_lm columns
def fill_na_in_columns(data):
for column in data.columns:
if "Shift" in column:
data[column].fillna(0.001, inplace=True)
elif "Z_lm_" in column:
data[column].fillna(0.0001, inplace=True)
return data
headSel = fill_na_in_columns(headSel)
# Filter based on standard deviation
def filter_by_sd(data, sd):
if sd == 0:
return data
z_lm_cols = data.filter(regex="z_lm_")
filter_vector = z_lm_cols.abs().ge(sd).any(axis=1)
return data[filter_vector]
REMcRdy = filter_by_sd(headSel.filter(regex="OrfRep|Gene|z_lm_"), args['sd'])
shiftOnly = filter_by_sd(headSel.filter(regex="OrfRep|Gene|z_shift"), args['sd'])
# Reorder columns to interleave Z_lm and Shift data
def reorder_columns(data1, data2):
combined_data = data1.copy()
for i in range(2, data1.shape[1]):
combined_data.insert(2 * i - 1, data2.columns[i], data2.iloc[:, i])
return combined_data
combI = reorder_columns(headSel2, shiftOnly)
# Write output files
REMcRdy.to_csv(os.path.join(args['out_dir'], "REMcRdy_lm_only.csv"), index=False, quotechar=False)
shiftOnly.to_csv(os.path.join(args['out_dir'], "Shift_only.csv"), index=False, quotechar=False)
# Relabel headers using experiment names from StudyInfo.csv
def relabel_headers(headers, labels):
new_labels = headers.copy()
for i, header in enumerate(headers):
suffix = header.split('.')[-1]
if suffix.isdigit() and int(suffix) in range(1, 4):
exp_name = labels.iloc[int(suffix) - 1, 1]
new_labels[i] = header.replace(f".{suffix}", f"_{exp_name}")
return new_labels
LabelStd = pd.read_csv(args['study_info'])
shiftOnly.columns = relabel_headers(shiftOnly.columns, LabelStd)
REMcRdy.columns = relabel_headers(REMcRdy.columns, LabelStd)
# Save relabeled files
REMcRdy.to_csv(os.path.join(args['out_dir'], "REMcRdy_lm_only.csv"), index=False, quotechar=False)
shiftOnly.to_csv(os.path.join(args['out_dir'], "Shift_only.csv"), index=False, quotechar=False)
# Save updated parameters
LabelStd.iloc[:, 3] = args['sd']
LabelStd.to_csv(os.path.join(args['out_dir'], "parameters.csv"), index=False)
LabelStd.to_csv(args['study_info'], index=False)

View File

@@ -1,12 +1,4 @@
#!/usr/bin/env Rscript
# This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments
#
# Updated 240724 Bryan C Roessler to improve file operations and portability
# NOTE: The two required arguments are the same and now there are two optional arguments
# 1. Exp1
# 2. Exp2
# 3. StudyInfo.csv file
# 4. Output Directory
library("ggplot2")
library("plotly")
@@ -16,31 +8,14 @@ library("grid")
library("ggthemes")
args <- commandArgs(TRUE)
exp_name <- args[1]
exp_name2 <- args[2]
exp1_name <- args[1]
exp1_file <- args[2]
exp2_name <- args[3]
exp2_file <- args[4]
output_dir <- args[5]
if (length(args) >= 3) {
study_info_file <- args[3]
} else {
study_info_file <- "StudyInfo.csv"
}
if (length(args) >= 4) {
output_dir <- args[4]
} else {
output_dir <- "gta"
}
expNumber1 <- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name))
expNumber2 <- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name2))
Labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE)
Name1 <- Labels[expNumber1, 2]
Name2 <- Labels[expNumber2, 2]
go_terms_file <- "Average_GOTerms_All.csv"
input_file1 <- file.path(output_dir, exp_name, go_terms_file)
input_file2 <- file.path(output_dir, exp_name2, go_terms_file)
pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp_name, "-", exp_name2))
pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp_name, "-", exp_name2))
pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp1_name, "-", exp2_name))
pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp1_name, "-", exp2_name))
# Pairwise L
# outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
@@ -75,21 +50,6 @@ theme_Publication <- function(base_size = 14, base_family = "sans") {
)
}
scale_fill_Publication <- function(...) {
library(scales)
discrete_scale("fill", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...
)
}
scale_colour_Publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...
)
}
theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
(theme_foundation(base_size = base_size, base_family = base_family) +
theme(
@@ -131,8 +91,8 @@ scale_colour_Publication <- function(...) {
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
}
X1 <- read.csv(file = input_file1, stringsAsFactors = FALSE, header = TRUE)
X2 <- read.csv(file = input_file2, stringsAsFactors = FALSE, header = TRUE)
X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
gg <- ggplot(data = X, aes(
@@ -146,7 +106,7 @@ gg <- ggplot(data = X, aes(
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(
aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
color = "grey20",
@@ -156,12 +116,12 @@ gg <- ggplot(data = X, aes(
fill = NA
) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOntology.pdf")),
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
width = 12,
height = 8
)
@@ -169,7 +129,7 @@ pdf(
gg
dev.off()
pgg <- ggplotly(gg)
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOntology.html"))
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
htmlwidgets::saveWidget(pgg, fname)
# ID aggravators and alleviators, regardless of whether they meet 2SD threshold
@@ -184,19 +144,19 @@ X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
paste(Name1, "Specific_Deletion_Suppresors", sep = "_"))
paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
gg <- ggplot(data = X, aes(
x = Z_lm_L_Avg_X1,
@@ -209,7 +169,7 @@ gg <- ggplot(data = X, aes(
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(
aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
color = "grey20",
@@ -219,12 +179,12 @@ gg <- ggplot(data = X, aes(
fill = NA
) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap.pdf")),
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
width = 12,
height = 8
)
@@ -232,7 +192,7 @@ pdf(
gg
dev.off()
pgg <- ggplotly(gg)
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
htmlwidgets::saveWidget(pgg, fname)
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
@@ -249,15 +209,15 @@ gg <- ggplot(data = x_rem2_gene, aes(
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_above2genes.pdf")),
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
width = 12,
height = 8
)
@@ -266,7 +226,7 @@ gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
htmlwidgets::saveWidget(pgg, fname)
#4
@@ -282,15 +242,15 @@ gg <- ggplot(data = X_overlap_nothresold, aes(
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.pdf")),
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
width = 12,
height = 8
)
@@ -299,7 +259,7 @@ gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.html"))
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
htmlwidgets::saveWidget(pgg, fname)
# Only output GTA terms where average score is still above 2 after subtracting the SD
@@ -320,16 +280,16 @@ X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 &
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ]
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(Name1, "Specific_Deletion_Suppresors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
#5
X_abovethreshold <- X[!(is.na(X$Overlap)), ]
@@ -345,15 +305,15 @@ gg <- ggplot(data = X_abovethreshold, aes(
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.pdf")),
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
width = 12,
height = 8
)
@@ -362,7 +322,7 @@ gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.html"))
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
htmlwidgets::saveWidget(pgg, fname)
#6
@@ -377,16 +337,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.pdf")),
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
width = 20,
height = 20
)
@@ -395,7 +355,7 @@ gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.html"))
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
htmlwidgets::saveWidget(pgg, fname)
X_abovethreshold$X1_Rank <- NA
@@ -415,16 +375,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
width = 15,
height = 15
)
@@ -435,7 +395,7 @@ pgg <- ggplotly(gg)
#pgg
fname <-
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
htmlwidgets::saveWidget(pgg, fname)
#8
@@ -450,16 +410,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
SD_1 = Z_lm_L_SD_X1,
SD_2 = Z_lm_L_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
width = 15,
height = 15
)
@@ -469,18 +429,18 @@ dev.off()
pgg <- ggplotly(gg)
#pgg
fname <-
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
htmlwidgets::saveWidget(pgg, fname)
write.csv(
x = X,
file.path(pairDirL, paste0("All_GTA_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
file.path(pairDirL, paste0("All_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
row.names = FALSE
)
write.csv(
x = X_abovethreshold,
file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
row.names = FALSE
)
@@ -582,8 +542,8 @@ scale_colour_Publication <- function(...) {
)
}
X1 <- read.csv(file = input_file1, stringsAsFactors = FALSE, header = TRUE)
X2 <- read.csv(file = input_file2, stringsAsFactors = FALSE, header = TRUE)
X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
#1
X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
@@ -599,15 +559,15 @@ gg <- ggplot(data = X, aes(
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOntology.pdf")),
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
width = 12,
height = 8
)
@@ -616,7 +576,7 @@ gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOntology.html"))
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
htmlwidgets::saveWidget(pgg, fname)
#2
@@ -632,23 +592,23 @@ X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm
X$Overlap_Avg <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(Name1, "Specific_Deletion_Suppressors", sep = "_"))
paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <-
"Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <-
"Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
gg <- ggplot(data = X, aes(
x = Z_lm_K_Avg_X1,
y = Z_lm_K_Avg_X2,
@@ -660,15 +620,15 @@ gg <- ggplot(data = X, aes(
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap.pdf")),
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
width = 12,
height = 8
)
@@ -678,12 +638,12 @@ dev.off()
pgg <- ggplotly(gg)
#2
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
htmlwidgets::saveWidget(pgg, fname)
#3
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
gg <- ggplot(data = x_rem2_gene, aes(
x = Z_lm_K_Avg_X1,
y = Z_lm_K_Avg_X2,
@@ -695,15 +655,15 @@ gg <- ggplot(data = x_rem2_gene, aes(
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_above2genes.pdf")),
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
width = 12,
height = 8
)
@@ -712,7 +672,7 @@ gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
htmlwidgets::saveWidget(pgg, fname)
#4
@@ -728,15 +688,15 @@ gg <- ggplot(data = X_overlap_nothresold, aes(
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.pdf")),
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
width = 12,
height = 8
)
@@ -746,7 +706,7 @@ dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.html"))
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
htmlwidgets::saveWidget(pgg, fname)
#5
@@ -769,21 +729,21 @@ X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 &
X$Overlap <- NA
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <-
paste(Name1, "Specific_Deletion_Suppressors", sep = "_"))
paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <-
paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <-
paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <-
paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <-
"Overlapping_Deletion_Suppressors")
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <-
"Overlapping_Deletion_Enhancers")
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
X_abovethreshold <- X[!(is.na(X$Overlap)), ]
gg <- ggplot(data = X_abovethreshold, aes(
@@ -797,15 +757,15 @@ gg <- ggplot(data = X_abovethreshold, aes(
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.pdf")),
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
width = 12,
height = 8
)
@@ -814,7 +774,7 @@ gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.html"))
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
htmlwidgets::saveWidget(pgg, fname)
#6
@@ -829,16 +789,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.pdf")),
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
width = 20,
height = 20
)
@@ -846,7 +806,7 @@ gg
dev.off()
pgg <- ggplotly(gg)
#pgg
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.html"))
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
htmlwidgets::saveWidget(pgg, fname)
#7
@@ -866,16 +826,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
width = 15,
height = 15
)
@@ -885,7 +845,7 @@ dev.off()
pgg <- ggplotly(gg)
#pgg
fname <-
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
htmlwidgets::saveWidget(pgg, fname)
#8
@@ -900,16 +860,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
SD_1 = Z_lm_K_SD_X1,
SD_2 = Z_lm_K_SD_X2
)) +
xlab(paste("GO Term Avg lm Z for", Name1)) +
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3, size = 3) +
ylab(paste("GO Term Avg lm Z for", Name2)) +
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
theme_Publication_legend_right()
pdf(
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
width = 15,
height = 15
)
@@ -919,17 +879,17 @@ dev.off()
pgg <- ggplotly(gg)
#pgg
fname <-
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
htmlwidgets::saveWidget(pgg, fname)
write.csv(
x = X,
file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
row.names = FALSE
)
write.csv(
x = X_abovethreshold,
file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
row.names = FALSE
)

View File

@@ -0,0 +1,335 @@
suppressMessages({
if (!require("ggplot2")) stop("Package ggplot2 is required but not installed.")
if (!require("plotly")) stop("Package plotly is required but not installed.")
if (!require("htmlwidgets")) stop("Package htmlwidgets is required but not installed.")
if (!require("dplyr")) stop("Package dplyr is required but not installed.")
if (!require("ggthemes")) stop("Package ggthemes is required but not installed.")
if (!require("plyr")) stop("Package plyr is required but not installed.")
})
# Constants for configuration
PLOT_WIDTH <- 14
PLOT_HEIGHT <- 9
BASE_SIZE <- 14
options(warn = 2, max.print = 100)
parse_arguments <- function() {
args <- if (interactive()) {
c(
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD",
3,
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
"Experiment 1: Doxo versus HLD",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
"Experiment 2: HLD versus Doxo",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp3",
"Experiment 3: HLD versus WT",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp4",
"Experiment 4: Doxo versus WT"
)
} else {
commandArgs(trailingOnly = TRUE)
}
paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE)
names <- args[seq(6, length(args), by = 2)]
experiments <- setNames(paths, names)
list(
out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
sd = as.numeric(args[2]),
sgd_gene_list = normalizePath(file.path(args[3]), mustWork = FALSE),
easy_results_file = normalizePath(file.path(args[4]), mustWork = FALSE),
experiments = experiments
)
}
args <- parse_arguments()
dir.create(args$out_dir, showWarnings = FALSE)
# Define themes and scales
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_foundation(base_size = base_size, base_family = base_family) +
theme(
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
text = element_text(),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.title.y = element_text(angle = 90, vjust = 2),
axis.title.x = element_text(vjust = -0.2),
axis.line = element_line(colour = "black"),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.key = element_rect(colour = NA),
legend.position = "bottom",
legend.direction = "horizontal",
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
}
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_publication(base_size, base_family) +
theme(
legend.position = "right",
legend.direction = "vertical",
legend.key.size = unit(0.5, "cm"),
legend.spacing = unit(0, "cm"),
legend.title = element_text(face = "italic")
)
}
scale_fill_publication <- function(...) {
discrete_scale("fill", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
)), ...)
}
scale_colour_publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
)), ...)
}
# Load SGD gene list
sgd_genes <- function(sgd_gene_list) {
read.delim(file = sgd_gene_list, quote = "", header = FALSE,
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
dplyr::rename(ORF = V4, GeneName = V5)
}
genes <- sgd_genes(args$sgd_gene_list)
load_and_preprocess_data <- function(easy_results_file, genes) {
df <- tryCatch({
read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
}, error = function(e) {
stop("Error reading file: ", easy_results_file, "\n", e$message)
}) %>%
filter(!.[[1]] %in% c("", "Scan")) # Fixed syntax
numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
df[numeric_columns[numeric_columns %in% colnames(df)]] <-
lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)
df <- df %>%
mutate(
L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA},
AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA},
conc_num = if ("Conc" %in% colnames(.)) as.numeric(gsub("[^0-9\\.]", "", Conc)) else NA,
delta_bg = if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(.)))
LstBackgrd - X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA},
GeneName = vapply(ORF, function(orf) {
gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
}, character(1)) # Ensures a character vector is returned
)
if (nrow(df) == 0) warning("Dataframe is empty after filtering")
return(df)
}
create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot
filtered_df <- filter(df, is.finite(.data[[var]]))
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
plot_func(shape = 3, size = 0.2, position = "jitter") +
stat_summary(fun = mean, geom = "point", size = 0.6) +
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
theme_publication()
pdf_path <- file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))
pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
print(p)
dev.off()
html_path <- sub(".pdf$", ".html", pdf_path)
pgg <- suppressWarnings(ggplotly(p, tooltip = c("L", "K", "ORF", "Gene", "delta_bg", "GeneName")) %>%
layout(legend = list(orientation = "h")))
saveWidget(pgg, html_path, selfcontained = TRUE)
}
publish_summary_stats <- function(df, variables, out_dir) {
stats_list <- lapply(variables, function(var) {
df %>%
dplyr::group_by(OrfRep, conc_num) %>%
dplyr::summarize(
N = dplyr::n(), # Ensure that the correct version of n() is used
mean_val = mean(.data[[var]], na.rm = TRUE),
sd_val = sd(.data[[var]], na.rm = TRUE),
se_val = sd_val / sqrt(N)
)
})
summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable")
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
}
publish_interaction_scores <- function(df, out_dir) {
interaction_scores <- df %>%
group_by(OrfRep) %>%
summarize(
mean_L = mean(L, na.rm = TRUE),
mean_K = mean(K, na.rm = TRUE),
mean_r = mean(r, na.rm = TRUE),
mean_AUC = mean(AUC, na.rm = TRUE),
delta_bg_mean = mean(delta_bg, na.rm = TRUE),
delta_bg_sd = sd(delta_bg, na.rm = TRUE)
) %>%
mutate(
l_rank = rank(mean_L),
k_rank = rank(mean_K),
r_rank = rank(mean_r),
auc_rank = rank(mean_AUC)
)
write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
write.csv(arrange(interaction_scores, l_rank, k_rank),
file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
}
publish_zscores <- function(df, out_dir) {
zscores <- df %>%
mutate(
zscore_L = scale(L, center = TRUE, scale = TRUE),
zscore_K = scale(K, center = TRUE, scale = TRUE),
zscore_r = scale(r, center = TRUE, scale = TRUE),
zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
)
write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
}
generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
variables <- c("L", "K", "r", "AUC", "delta_bg")
lapply(variables, create_and_publish_plot, df = df, plot_type = "scatter", out_dir_qc = out_dir_qc)
delta_bg_above_tolerance <- filter(df, delta_bg >= delta_bg_tolerance)
lapply(variables, create_and_publish_plot, df = delta_bg_above_tolerance,
plot_type = "scatter", out_dir_qc = out_dir_qc, suffix = "_above_tolerance")
}
process_exp_dir <- function(exp_dir, exp_name, genes, easy_results_file) {
out_dir <- file.path(exp_dir, "zscores")
out_dir_qc <- file.path(exp_dir, "qc")
dir.create(out_dir, showWarnings = FALSE)
dir.create(out_dir_qc, showWarnings = FALSE)
df <- load_and_preprocess_data(easy_results_file, genes)
delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + 3 * sd(df$delta_bg, na.rm = TRUE)
generate_and_publish_qc(df, delta_bg_tolerance, out_dir_qc)
variables <- c("L", "K", "r", "AUC", "delta_bg")
publish_summary_stats(df, variables, out_dir)
publish_interaction_scores(df, out_dir)
publish_zscores(df, out_dir)
list(
zscores_file = file.path(out_dir, "zscores_interaction.csv"),
exp_name = exp_name
)
}
processed_experiments <- lapply(names(args$experiments), function(exp_name) {
process_exp_dir(args$experiments[[exp_name]], exp_name, genes, args$easy_results_file)
})
zscores_files <- sapply(processed_experiments, `[[`, "zscores_file")
exp_names <- sapply(processed_experiments, `[[`, "exp_name")
combine_zscores <- function(zscores_files) {
if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
joined_data <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
for (file in zscores_files[-1]) {
temp_data <- read.csv(file = file, stringsAsFactors = FALSE)
joined_data <- plyr::join(joined_data, temp_data, by = "OrfRep", type = "full")
}
joined_data
}
process_combined_zscores <- function(joined_data, sd, out_dir, exp_names) {
ordered_data <- joined_data %>%
select(contains("OrfRep"), matches("Gene"),
contains("z_lm_k"), contains("z_shift_k"),
contains("z_lm_l"), contains("z_shift_l")) %>%
arrange(contains("OrfRep"))
clean_headers <- function(data, suffixes) {
suffixes_to_remove <- paste0("Gene.", seq_len(suffixes))
select(data, -all_of(suffixes_to_remove))
}
headSel <- clean_headers(ordered_data, length(zscores_files) - 1)
headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1)
fill_na_in_columns <- function(data) {
for (header in colnames(data)) {
if (grepl("Shift", header)) {
data[[header]][is.na(data[[header]])] <- 0.001
} else if (grepl("Z_lm_", header)) {
data[[header]][is.na(data[[header]])] <- 0.0001
}
}
data
}
headSel <- fill_na_in_columns(headSel)
filter_by_sd <- function(data, sd) {
if (sd == 0) return(data)
z_lm_cols <- select(data, contains("z_lm_"))
filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
data[filter_vector, ]
}
REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), sd)
shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), sd)
reorder_columns <- function(data1, data2) {
combined_data <- data1
for (i in 3:ncol(data1)) {
combined_data <- cbind(combined_data, data2[i], data1[i])
}
combined_data
}
combI <- reorder_columns(headSel2, shiftOnly)
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
relabel_headers <- function(headers, exp_names) {
new_labels <- headers
for (i in seq_along(headers)) {
suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
if (suffix %in% seq_along(exp_names)) {
exp_name <- exp_names[as.numeric(suffix)]
new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
}
}
new_labels
}
colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), exp_names)
colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), exp_names)
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
}
joined_data <- combine_zscores(zscores_files)
process_combined_zscores(joined_data, args$sd, args$out_dir, exp_names)

View File

@@ -0,0 +1,209 @@
suppressMessages({
library("ggplot2")
library("plotly")
library("htmlwidgets")
library("extrafont")
library("grid")
library("ggthemes")
})
# Constants for configuration
PLOT_WIDTH <- 12
PLOT_HEIGHT <- 8
BASE_SIZE <- 14
options(warn = 2, max.print = 100)
# Parse arguments
parse_arguments <- function() {
if (interactive()) {
args <- c(
"Exp1",
"/path/to/exp1_file.csv",
"Exp2",
"/path/to/exp2_file.csv",
"/path/to/output_dir"
)
} else {
args <- commandArgs(trailingOnly = TRUE)
}
list(
exp1_name = args[1],
exp1_file = normalizePath(args[2], mustWork = TRUE),
exp2_name = args[3],
exp2_file = normalizePath(args[4], mustWork = TRUE),
output_dir = normalizePath(args[5], mustWork = FALSE)
)
}
args <- parse_arguments()
# Create output directories if they don't exist
pairDirL <- file.path(args$output_dir, paste0("PairwiseCompareL_", args$exp1_name, "-", args$exp2_name))
pairDirK <- file.path(args$output_dir, paste0("PairwiseCompareK_", args$exp1_name, "-", args$exp2_name))
dir.create(pairDirL, showWarnings = FALSE, recursive = TRUE)
dir.create(pairDirK, showWarnings = FALSE, recursive = TRUE)
# Define themes and scales
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_foundation(base_size = base_size, base_family = base_family) +
theme(
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
panel.background = element_rect(colour = NA),
plot.background = element_rect(colour = NA),
panel.border = element_rect(colour = NA),
axis.title = element_text(face = "bold", size = rel(1)),
axis.line = element_line(colour = "black"),
panel.grid.major = element_line(colour = "#f0f0f0"),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.direction = "horizontal",
legend.key.size = unit(0.2, "cm"),
plot.margin = unit(c(10, 5, 5, 5), "mm"),
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
strip.text = element_text(face = "bold")
)
}
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
theme_publication(base_size, base_family) +
theme(
legend.position = "right",
legend.direction = "vertical",
legend.key.size = unit(0.5, "cm")
)
}
scale_fill_publication <- function(...) {
discrete_scale("fill", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
)), ...)
}
scale_colour_publication <- function(...) {
discrete_scale("colour", "Publication", manual_pal(values = c(
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
)), ...)
}
# Read and merge data
load_and_merge_data <- function(file1, file2) {
df1 <- read.csv(file = file1, stringsAsFactors = FALSE, header = TRUE)
df2 <- read.csv(file = file2, stringsAsFactors = FALSE, header = TRUE)
merge(df1, df2, by = "Term_Avg", all = TRUE, suffixes = c("_df1", "_df2"))
}
# Generate a plot and save to PDF and HTML
generate_plot <- function(data, x_var, y_var, color_var, title, file_path, theme_function) {
gg <- ggplot(data = data, aes_string(
x = x_var,
y = y_var,
color = color_var
)) +
xlab(paste("GO Term Avg lm Z for", args$exp1_name)) +
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
geom_point(shape = 3) +
ylab(paste("GO Term Avg lm Z for", args$exp2_name)) +
ggtitle(title) +
theme_function()
pdf(file.path(file_path, ".pdf"), width = PLOT_WIDTH, height = PLOT_HEIGHT)
print(gg)
dev.off()
pgg <- ggplotly(gg)
htmlwidgets::saveWidget(pgg, file.path(file_path, ".html"))
}
# Identify and annotate specific interactions
annotate_interactions <- function(df, exp1_name, exp2_name, suffix) {
df$Overlap_Avg <- NA
interactions <- list(
"df1_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] < 2),
"df1_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] > -2),
"df2_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] < 2),
"df2_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] > -2),
"Overlap_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2),
"Overlap_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2),
"df2_Aggravators_df1_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2),
"df2_Alleviators_df1_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2)
)
annotation <- list(
df1_Specific_Aggravators = paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"),
df1_Specific_Alleviators = paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"),
df2_Specific_Aggravators = paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"),
df2_Specific_Alleviators = paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"),
Overlap_Aggravators = "Overlapping_Deletion_Enhancers",
Overlap_Alleviators = "Overlapping_Deletion_Suppressors",
df2_Aggravators_df1_Alleviators = paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"),
df2_Alleviators_df1_Aggravators = paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")
)
for (key in names(interactions)) {
try(df$Overlap_Avg[interactions[[key]]] <- annotation[[key]])
}
df
}
# Rank and filter data
rank_and_filter_data <- function(df, suffix) {
z1 <- df
z1[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df1")]] - z1[[paste0("Z_lm_", suffix, "_SD_df1")]]
z1[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df2")]] - z1[[paste0("Z_lm_", suffix, "_SD_df2")]]
z2 <- df
z2[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df1")]] + z2[[paste0("Z_lm_", suffix, "_SD_df1")]]
z2[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df2")]] + z2[[paste0("Z_lm_", suffix, "_SD_df2")]]
df_above_threshold <- df[!is.na(df$Overlap_Avg), ]
df_above_threshold$df1_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df1")]], ties.method = "random")
df_above_threshold$df2_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df2")]], ties.method = "random")
list(z1 = z1, z2 = z2, df_above_threshold = df_above_threshold)
}
# Main execution function for a pairwise comparison
run_pairwise_comparison <- function(suffix, dir) {
df <- load_and_merge_data(args$exp1_file, args$exp2_file)
# Generate initial ontology-based plot
generate_plot(df,
paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Ontology_Avg_df1",
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOntology")),
theme_publication_legend_right)
# Annotate interactions and generate overlap-based plot
df <- annotate_interactions(df, args$exp1_name, args$exp2_name, suffix)
ranks <- rank_and_filter_data(df, suffix)
generate_plot(df,
paste0("Z_lm_", suffix, "_Avg_df1"),
paste0("Z_lm_", suffix, "_Avg_df2"),
"Overlap_Avg",
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap")),
theme_publication_legend_right)
generate_plot(ranks$df_above_threshold,
paste0("Z_lm_", suffix, "_Avg_df1"),
paste0("Z_lm_", suffix, "_Avg_df2"),
"Overlap_Avg",
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name, "Above Threshold"),
file.path(dir,
paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap_AboveThreshold")),
theme_publication_legend_right)
# Save CSV files
write.csv(df, file.path(dir,
paste0("All_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
row.names = FALSE)
write.csv(ranks$df_above_threshold,
file.path(dir, paste0("AboveThreshold_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
row.names = FALSE)
}
# Execute Pairwise L and Pairwise K comparisons
run_pairwise_comparison("L", pairDirL)
run_pairwise_comparison("K", pairDirK)

View File

@@ -14,7 +14,7 @@ BASE_SIZE <- 14
options(warn = 2, max.print = 100)
# Function to parse arguments
# Parse arguments
parse_arguments <- function() {
if (interactive()) {
args <- c(
@@ -92,7 +92,7 @@ scale_colour_publication <- function(...) {
)), ...)
}
# Function to load and preprocess data
# Load and preprocess data
load_and_preprocess_data <- function(input_file) {
df <- tryCatch({
read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
@@ -124,7 +124,7 @@ df <- df %>%
mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21")
# Function to create plot
# Create plot
create_plot <- function(df, var, plot_type) {
filtered_df <- df %>% filter(is.finite(.data[[var]]))
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
@@ -143,7 +143,7 @@ create_plot <- function(df, var, plot_type) {
return(p)
}
# Function to publish plot to PDF and HTML (Plotly)
# Publish plot to PDF and HTML (Plotly)
publish_plot <- function(plot, plot_path) {
# if (file.exists(plot_path)) {
# file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX))
@@ -159,7 +159,7 @@ publish_plot <- function(plot, plot_path) {
saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE)
}
# Function to publish multiple plots
# Publish multiple plots
publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") {
lapply(variables, function(var) {
plot <- create_plot(df, var, plot_type)
@@ -167,7 +167,7 @@ publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix
})
}
# Function to calculate and publish summary statistics
# Calculate and publish summary statistics
publish_summary_stats <- function(df, variables, out_dir) {
stats_list <- lapply(variables, function(var) {
df %>%
@@ -183,7 +183,7 @@ publish_summary_stats <- function(df, variables, out_dir) {
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
}
# Function to calculate and publish interaction scores
# Calculate and publish interaction scores
publish_interaction_scores <- function(df, out_dir) {
interaction_scores <- df %>%
group_by(OrfRep) %>%
@@ -207,7 +207,7 @@ publish_interaction_scores <- function(df, out_dir) {
arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
}
# Function to publish z-scores
# Publish z-scores
publish_zscores <- function(df, out_dir) {
zscores <- df %>%
mutate(

View File

@@ -1,238 +1,375 @@
#!/usr/bin/env Rscript
# JoinInteractExps.R
library("plyr")
library("sos")
library("dplyr")
args <- commandArgs(TRUE)
# Set output dir
if (length(args) >= 1) {
out_dir <- file.path(args[1])
} else {
out_dir <- "./" # for legacy workflow
}
# Set sd value
if (length(args) >= 2) {
sd <- as.numeric(args[2])
} else {
sd <- 2 # default value
}
sprintf("SD value is: %d", sd)
# Set study_info file
if (length(args) >= 3) {
study_info <- file.path(args[3])
} else {
study_info <- "../Code/StudyInfo.csv" # for legacy workflow
}
studies <- args[4:length(args)]
print(studies)
input_files <- c()
for (i in seq_along(studies)) {
study <- studies[i]
zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
if (file.exists(zs_file)) {
input_files[i] <- zs_file
# Function to parse and set arguments
parse_arguments <- function() {
if (interactive()) {
args <- c(
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
2,
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1",
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp2"
)
} else {
args <- commandArgs(trailingOnly = TRUE)
}
}
rm(zs_file, study)
# for (var in ls()) {
# print(paste(var, ":", get(var)))
list(
out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
sd = as.numeric(args[2]),
study_info = normalizePath(file.path(args[3]), mustWork = FALSE),
input_dirs = args[4:length(args)]
)
}
args <- parse_arguments()
# Create an array for the zscores files
get_zscores_files <- function(dirs) {
files <- sapply(dirs, function(study) {
file_path <- file.path(study, "zscores", "zscores_interaction.csv")
if (file.exists(file_path)) file_path else NULL
}, simplify = TRUE, USE.NAMES = FALSE)
# Filter out NULL entries
files[!sapply(files, is.null)]
}
zscores_files <- get_zscores_files(args$input_dirs)
sprintf("The SD value is: %d", args$sd)
# Ensure there are enough files to compare
if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
# Function to join zscores files
join_zscores_files <- function(files) {
joined_data <- read.csv(file = files[1], stringsAsFactors = FALSE)
for (file in files[-1]) {
temp_data <- read.csv(file = file, stringsAsFactors = FALSE)
joined_data <- join(joined_data, temp_data, by = "OrfRep")
}
joined_data
}
# Load and join zscores files
joined_data <- join_zscores_files(zscores_files)
# Order and select columns
order_and_select_columns <- function(data) {
ordered_data <- data[, order(colnames(data))]
selected_headers <- select(ordered_data,
contains("OrfRep"), matches("Gene"),
contains("z_lm_k"), contains("z_shift_k"),
contains("z_lm_l"), contains("z_shift_l"))
selected_headers
}
selected_headers <- order_and_select_columns(joined_data)
# Remove redundant columns like "Gene.1"
clean_headers <- function(data, suffixes) {
suffixes_to_remove <- paste0("Gene.", seq_len(suffixes))
select(data, -all_of(suffixes_to_remove))
}
headSel <- clean_headers(selected_headers, length(zscores_files) - 1)
headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1)
# Fill NA values in Shift and Z_lm columns
fill_na_in_columns <- function(data) {
for (header in colnames(data)) {
if (grepl("Shift", header)) {
data[[header]][is.na(data[[header]])] <- 0.001
} else if (grepl("Z_lm_", header)) {
data[[header]][is.na(data[[header]])] <- 0.0001
}
}
data
}
headSel <- fill_na_in_columns(headSel)
# Filter based on standard deviation
filter_by_sd <- function(data, sd) {
if (sd == 0) return(data)
z_lm_cols <- select(data, contains("z_lm_"))
filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
data[filter_vector, ]
}
REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), args$sd)
shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), args$sd)
# Reorder columns to interleave Z_lm and Shift data
reorder_columns <- function(data1, data2) {
combined_data <- data1
for (i in 3:ncol(data1)) {
combined_data <- cbind(combined_data, data2[i], data1[i])
}
combined_data
}
combI <- reorder_columns(headSel2, shiftOnly)
# Write output files
write.csv(REMcRdy, file.path(args$out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(args$out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
# Relabel headers using experiment names from StudyInfo.csv
relabel_headers <- function(headers, labels) {
new_labels <- headers
for (i in seq_along(headers)) {
suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
if (suffix %in% 1:3) {
exp_name <- labels[as.numeric(suffix), 2]
new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
}
}
new_labels
}
LabelStd <- read.csv(file = args$study_info, stringsAsFactors = FALSE)
colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), LabelStd)
colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), LabelStd)
# Save relabeled files
write.csv(REMcRdy, file.path(args$out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(args$out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
# Save updated parameters
LabelStd[, 4] <- args$sd
write.csv(LabelStd, file.path(args$out_dir, "parameters.csv"), row.names = FALSE)
write.csv(LabelStd, file = args$study_info, row.names = FALSE)
# library("plyr")
# library("sos")
# library("dplyr")
# # Function to parse and set arguments
# parse_arguments <- function() {
# if (interactive()) {
# args <- c(
# "./", # Default out_dir
# "2", # Default SD value
# "../Code/StudyInfo.csv" # Default study_info path
# )
# } else {
# args <- commandArgs(trailingOnly = TRUE)
# }
# print(input_files)
# print(length(input_files))
# list(
# out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
# sd = as.numeric(args[2]),
# study_info = normalizePath(file.path(args[3]), mustWork = FALSE),
# input_dirs = args[4:length(args)]
# )
# }
# TODO this is better handled in a loop in case you want to compare more experiments?
# The input is already designed for this
# Read in the files for your experiment and
# Join the two files at a time as a function of how many inputFile
# list the larger file first ? in this example X2 has the larger number of genes
# If X1 has a larger number of genes, switch the order of X1 and X2
if (length(input_files) < 2) {
print("Note enough Exps to compare, skipping join")
stop("Exiting script")
}
# args <- parse_arguments()
if (length(input_files) >= 2) {
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
X <- join(X1, X2, by = "OrfRep")
OBH <- X[, order(colnames(X))] # OrderByHeader
headers <- select(OBH, contains("OrfRep"), matches("Gene"),
contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column # frame for interleaving Z_lm with Shift colums
}
# # Create an array for the zscores files
# zscores_files <- sapply(args$input_dirs, function(study) {
# file_path <- file.path(study, "zscores", "zscores_interaction.csv")
# if (file.exists(file_path)) file_path else NULL
# }, simplify = TRUE, USE.NAMES = FALSE)
if (length(input_files) >= 3) {
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE)
X <- join(X, X3, by = "OrfRep")
headSel <- select(headers, -"Gene.1", -"Gene.2")
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
}
# # Filter out NULL entries
# zscores_files <- zscores_files[!sapply(zscores_files, is.null)]
if (length(input_files) >= 4) {
X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE)
X <- join(X, X4, by = "OrfRep")
headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
}
# sprintf("The SD value is: %d", args$sd)
# # print(args$input_dirs)
# print(headers)
# headSel$contains("Z_Shift") %>% replace_na(0.001)
headers <- colnames(headSel)
# print(headers)
i <- 0
for (i in 1:length(headers)) {
if (grepl("Shift", headers[i])) {
headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.001
}
if (grepl("Z_lm_", headers[i])) {
headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.0001
}
}
# # TODO this is better handled in a loop in case you want to compare more experiments?
# # The input is already designed for this
# # Read in the files for your experiment and
# # Join the two files at a time as a function of how many inputFile
# # list the larger file first ? in this example X2 has the larger number of genes
# # If X1 has a larger number of genes, switch the order of X1 and X2
# if (length(zscores_files) < 2) {
# print("Note enough Exps to compare, skipping join")
# stop("Exiting script")
# }
# 2SD option code to exclude Z_lm values less than 2 standard Deviations
REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_"))
shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift"))
# if (length(zscores_files) >= 2) {
# X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
# X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
# X <- join(X1, X2, by = "OrfRep")
# OBH <- X[, order(colnames(X))] # OrderByHeader
# headers <- select(OBH, contains("OrfRep"), matches("Gene"),
# contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
# headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
# headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
# headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column # frame for interleaving Z_lm with Shift colums
# }
# Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
# if (length(zscores_files) >= 3) {
# X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE)
# X <- join(X, X3, by = "OrfRep")
# headSel <- select(headers, -"Gene.1", -"Gene.2")
# headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
# headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
# }
# Using Text search grepl to relabel headers
REMcRdyHdr <- colnames(REMcRdy)
REMcRdyLabels <- "asdf"
shftHdr <- colnames(shiftOnly)
shiftLabels <- "asdf"
shiftLabels[1:2] <- shftHdr[1:2]
REMcRdyLabels[1:2] <- REMcRdyHdr[1:2]
# if (length(zscores_files) >= 4) {
# X4 <- read.csv(file = zscores_files[4], stringsAsFactors = FALSE)
# X <- join(X, X4, by = "OrfRep")
# headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
# headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
# headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
# }
for (i in 3:(length(shftHdr))) {
if (i == 3) {
shiftLabels[3] <- paste0(Labels[1, 2], ".", shftHdr[3])
REMcRdyLabels[3] <- paste0(Labels[1, 2], ".", REMcRdyHdr[3])
}
if (i == 5) {
shiftLabels[5] <- paste0(Labels[1, 2], ".", shftHdr[5])
REMcRdyLabels[5] <- paste0(Labels[1, 2], ".", REMcRdyHdr[5])
}
if (i == 7) {
shiftLabels[7] <- paste0(Labels[1, 2], ".", shftHdr[7])
REMcRdyLabels[7] <- paste0(Labels[1, 2], ".", REMcRdyHdr[7])
}
if (grepl(".1", shftHdr[i], fixed = true)) {
shiftLabels[i] <- paste0(Labels[2, 2], ".", shftHdr[i])
REMcRdyLabels[i] <- paste0(Labels[2, 2], ".", REMcRdyHdr[i])
}
if (grepl(".2", shftHdr[i], fixed = true)) {
shiftLabels[i] < -paste0(Labels[3, 2], ".", shftHdr[i])
REMcRdyLabels[i] <- paste0(Labels[3, 2], ".", REMcRdyHdr[i])
}
if (grepl(".3", shftHdr[i], fixed = true)) {
shiftLabels[i] <- paste0(Labels[4, 2], ".", shftHdr[i])
REMcRdyLabels[i] <- paste0(Labels[4, 2], ".", REMcRdyHdr[i])
}
}
# # print(headers)
# # headSel$contains("Z_Shift") %>% replace_na(0.001)
# headers <- colnames(headSel)
# # print(headers)
# i <- 0
# for (i in 1:length(headers)) {
# if (grepl("Shift", headers[i])) {
# headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.001
# }
# if (grepl("Z_lm_", headers[i])) {
# headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.0001
# }
# }
for (i in 3:(length(REMcRdyLabels))) {
j <- as.integer(i)
REMcRdyLabels[j] <- gsub("[.]", "_", REMcRdyLabels[j])
shiftLabels[j] <- gsub("[.]", "_", shiftLabels[j])
}
# # 2SD option code to exclude Z_lm values less than 2 standard Deviations
# REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_"))
# shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift"))
colnames(shiftOnly) <- shiftLabels
colnames(REMcRdy) <- REMcRdyLabels
# # Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
# Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
combI <- headSel2 # starting Template orf, Genename columns
# # Using Text search grepl to relabel headers
# REMcRdyHdr <- colnames(REMcRdy)
# REMcRdyLabels <- "asdf"
# shftHdr <- colnames(shiftOnly)
# shiftLabels <- "asdf"
# shiftLabels[1:2] <- shftHdr[1:2]
# REMcRdyLabels[1:2] <- REMcRdyHdr[1:2]
# headersRemc<-colnames(REMcRdy)
# Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
for (i in 3:length(colnames(REMcRdy))) {
combI <- cbind.data.frame(combI, shiftOnly[i])
combI <- cbind.data.frame(combI, REMcRdy[i])
}
# for (i in 3:(length(shftHdr))) {
# if (i == 3) {
# shiftLabels[3] <- paste0(Labels[1, 2], ".", shftHdr[3])
# REMcRdyLabels[3] <- paste0(Labels[1, 2], ".", REMcRdyHdr[3])
# }
# if (i == 5) {
# shiftLabels[5] <- paste0(Labels[1, 2], ".", shftHdr[5])
# REMcRdyLabels[5] <- paste0(Labels[1, 2], ".", REMcRdyHdr[5])
# }
# if (i == 7) {
# shiftLabels[7] <- paste0(Labels[1, 2], ".", shftHdr[7])
# REMcRdyLabels[7] <- paste0(Labels[1, 2], ".", REMcRdyHdr[7])
# }
# if (grepl(".1", shftHdr[i], fixed = true)) {
# shiftLabels[i] <- paste0(Labels[2, 2], ".", shftHdr[i])
# REMcRdyLabels[i] <- paste0(Labels[2, 2], ".", REMcRdyHdr[i])
# }
# if (grepl(".2", shftHdr[i], fixed = true)) {
# shiftLabels[i] < -paste0(Labels[3, 2], ".", shftHdr[i])
# REMcRdyLabels[i] <- paste0(Labels[3, 2], ".", REMcRdyHdr[i])
# }
# if (grepl(".3", shftHdr[i], fixed = true)) {
# shiftLabels[i] <- paste0(Labels[4, 2], ".", shftHdr[i])
# REMcRdyLabels[i] <- paste0(Labels[4, 2], ".", REMcRdyHdr[i])
# }
# }
Vec1 <- NA
Vec2 <- NA
Vec3 <- NA
Vec4 <- NA
Vec5 <- NA
Vec6 <- NA
Vec7 <- NA
Vec8 <- NA
# for (i in 3:(length(REMcRdyLabels))) {
# j <- as.integer(i)
# REMcRdyLabels[j] <- gsub("[.]", "_", REMcRdyLabels[j])
# shiftLabels[j] <- gsub("[.]", "_", shiftLabels[j])
# }
if (length(REMcRdy) == 6) {
Vec1 <- abs(REMcRdy[, 3]) >= sd
Vec2 <- abs(REMcRdy[, 4]) >= sd
Vec3 <- abs(REMcRdy[, 5]) >= sd
Vec4 <- abs(REMcRdy[, 6]) >= sd
bolVec <- Vec1 | Vec2 | Vec3 | Vec4
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
shiftOnlyGT2[, 3:6] <- shiftOnly[bolVec, 3:6]
}
# colnames(shiftOnly) <- shiftLabels
# colnames(REMcRdy) <- REMcRdyLabels
if (length(REMcRdy) == 8) {
Vec1 <- abs(REMcRdy[, 3]) >= sd
Vec2 <- abs(REMcRdy[, 4]) >= sd
Vec3 <- abs(REMcRdy[, 5]) >= sd
Vec4 <- abs(REMcRdy[, 6]) >= sd
Vec5 <- abs(REMcRdy[, 7]) >= sd
Vec6 <- abs(REMcRdy[, 8]) >= sd
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
shiftOnlyGT2[, 3:8] <- shiftOnly[bolVec, 3:8]
}
# combI <- headSel2 # starting Template orf, Genename columns
if (length(REMcRdy) == 10) {
Vec1 <- abs(REMcRdy[, 3]) >= sd
Vec2 <- abs(REMcRdy[, 4]) >= sd
Vec3 <- abs(REMcRdy[, 5]) >= sd
Vec4 <- abs(REMcRdy[, 6]) >= sd
Vec5 <- abs(REMcRdy[, 7]) >= sd
Vec6 <- abs(REMcRdy[, 8]) >= sd
Vec7 <- abs(REMcRdy[, 9]) >= sd
Vec8 <- abs(REMcRdy[, 10]) >= sd
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
}
# # headersRemc<-colnames(REMcRdy)
# # Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
# for (i in 3:length(colnames(REMcRdy))) {
# combI <- cbind.data.frame(combI, shiftOnly[i])
# combI <- cbind.data.frame(combI, REMcRdy[i])
# }
if (sd != 0) {
REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)]
shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)]
}
# Vec1 <- NA
# Vec2 <- NA
# Vec3 <- NA
# Vec4 <- NA
# Vec5 <- NA
# Vec6 <- NA
# Vec7 <- NA
# Vec8 <- NA
if (sd == 0) {
REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
}
# if (length(REMcRdy) == 6) {
# Vec1 <- abs(REMcRdy[, 3]) >= sd
# Vec2 <- abs(REMcRdy[, 4]) >= sd
# Vec3 <- abs(REMcRdy[, 5]) >= sd
# Vec4 <- abs(REMcRdy[, 6]) >= sd
# bolVec <- Vec1 | Vec2 | Vec3 | Vec4
# REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
# REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
# shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
# shiftOnlyGT2[, 3:6] <- shiftOnly[bolVec, 3:6]
# }
# R places hidden "" around the header names. The following
# is intended to remove those quote so that the "" do not blow up the Java REMc.
# Use ,quote=F in the write.csv statement to fix R output file.
# write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
# LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
# if (length(REMcRdy) == 8) {
# Vec1 <- abs(REMcRdy[, 3]) >= sd
# Vec2 <- abs(REMcRdy[, 4]) >= sd
# Vec3 <- abs(REMcRdy[, 5]) >= sd
# Vec4 <- abs(REMcRdy[, 6]) >= sd
# Vec5 <- abs(REMcRdy[, 7]) >= sd
# Vec6 <- abs(REMcRdy[, 8]) >= sd
# bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
# REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
# REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
# shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
# shiftOnlyGT2[, 3:8] <- shiftOnly[bolVec, 3:8]
# }
LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
# print(sd)
LabelStd[, 4] <- as.numeric(sd)
write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
write.csv(LabelStd, file = study_info, row.names = FALSE)
# if (length(REMcRdy) == 10) {
# Vec1 <- abs(REMcRdy[, 3]) >= sd
# Vec2 <- abs(REMcRdy[, 4]) >= sd
# Vec3 <- abs(REMcRdy[, 5]) >= sd
# Vec4 <- abs(REMcRdy[, 6]) >= sd
# Vec5 <- abs(REMcRdy[, 7]) >= sd
# Vec6 <- abs(REMcRdy[, 8]) >= sd
# Vec7 <- abs(REMcRdy[, 9]) >= sd
# Vec8 <- abs(REMcRdy[, 10]) >= sd
# bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
# REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
# REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
# shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
# shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
# }
# if (sd != 0) {
# REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)]
# shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)]
# }
# if (sd == 0) {
# REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
# shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
# }
# # R places hidden "" around the header names. The following
# # is intended to remove those quote so that the "" do not blow up the Java REMc.
# # Use ,quote=F in the write.csv statement to fix R output file.
# # write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
# write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
# write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
# # LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
# LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
# # print(sd)
# LabelStd[, 4] <- as.numeric(sd)
# write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
# write.csv(LabelStd, file = study_info, row.names = FALSE)

File diff suppressed because it is too large Load Diff