Add new configuration file
This commit is contained in:
121
workflow/.old/apps/shell/submodules/parse_study_info
Normal file
121
workflow/.old/apps/shell/submodules/parse_study_info
Normal file
@@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env/bash
|
||||
# @description Creates, modifies, and parses the study info file
|
||||
#
|
||||
# TODO
|
||||
#
|
||||
# * Needs refactoring
|
||||
# * Ended up combining a few functions into one
|
||||
#
|
||||
#
|
||||
# @exitcode 0 If one or more studies found
|
||||
# @exitcode 1 If no studies found
|
||||
# @set STUDIES array contains array of "exp# sd ExpDir"
|
||||
parse_study_info() {
|
||||
debug "Running: ${FUNCNAME[0]}"
|
||||
|
||||
# Only run this once per project
|
||||
# in case we run multiple modules
|
||||
(( SET_STUDIES )) && return 0
|
||||
declare -g SET_STUDIES=1
|
||||
|
||||
# Use initials from project or whoami?
|
||||
# Best I can do is first two letters of username
|
||||
# See TODO in markdown
|
||||
initials="${USER:0:2}"
|
||||
INITIALS=${initials^^}
|
||||
|
||||
empty_study=1
|
||||
# Find an Exp directory that does not exist
|
||||
while [[ -d $STUDY_RESULTS_DIR/exp$empty_study ]]; do
|
||||
(( empty_study++ ))
|
||||
done
|
||||
|
||||
next_study_entry="$empty_study,$PROJECT_NAME,NA,NA,$INITIALS"
|
||||
|
||||
echo "${underline}Study Info File${nounderline}"
|
||||
|
||||
if [[ -f $STUDY_INFO_FILE ]]; then
|
||||
# Get latest entry
|
||||
while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column
|
||||
studies_nums+=("$col1")
|
||||
done < <(tail -n +2 "$STUDY_INFO_FILE")
|
||||
largest=${studies_nums[0]}
|
||||
for i in "${studies_nums[@]}"; do
|
||||
if ((i > largest)); then
|
||||
largest=$i
|
||||
fi
|
||||
done
|
||||
empty_study=$((largest+1))
|
||||
next_study_entry="$((empty_study)),$PROJECT_NAME,NA,NA,$INITIALS"
|
||||
else # create a default study info file
|
||||
echo "ExpNumb,ExpLabel,BackgroundSD,ZscoreJoinSD,AnalysisBy" > "$STUDY_INFO_FILE"
|
||||
echo "$next_study_entry" >> "$STUDY_INFO_FILE"
|
||||
next_study_entry="$((empty_study+1)),$PROJECT_NAME,NA,NA,$INITIALS"
|
||||
fi
|
||||
|
||||
# Print current studies
|
||||
cat <<-EOF
|
||||
* Give each experiment labels to be used for the plots and specific files.
|
||||
* Enter the desired experiment names in the order they should appear in the REMc heatmaps
|
||||
|
||||
Current study info file contents:
|
||||
|
||||
${underline}$STUDY_INFO_FILE${nounderline}
|
||||
$(cat "$STUDY_INFO_FILE")
|
||||
|
||||
EOF
|
||||
|
||||
# Allow user to add/edit the study info file
|
||||
if ! ((YES)); then
|
||||
for ((i=1; i<2; i++)); do
|
||||
cat <<-EOF
|
||||
Next entry suggestion: "$next_study_entry"
|
||||
|
||||
Would you like to:
|
||||
* (a)dd the suggested entry
|
||||
* (e)dit the study info file manually
|
||||
* (c)ontinue (default)
|
||||
EOF
|
||||
read -r -p "(c): " response
|
||||
echo ""
|
||||
[[ -z $response ]] && break
|
||||
case $response in
|
||||
a)
|
||||
echo "Adding auto-entry suggestion to $STUDY_INFO_FILE"
|
||||
echo "$next_study_entry" >> "$STUDY_INFO_FILE"
|
||||
next_study_entry="$((empty_study+1)),$PROJECT_NAME,NA,NA,$INITIALS"
|
||||
i=0
|
||||
;;
|
||||
e)
|
||||
debug "$EDITOR $STUDY_INFO_FILE"
|
||||
"$EDITOR" "$STUDY_INFO_FILE"
|
||||
;;
|
||||
c)
|
||||
break
|
||||
;;
|
||||
*)
|
||||
err "Invalid response, please try again"
|
||||
i=0
|
||||
;;
|
||||
esac
|
||||
break
|
||||
done
|
||||
fi
|
||||
|
||||
# Read study info file
|
||||
declare -ga STUDIES
|
||||
while IFS=',' read -r num _ sd _; do
|
||||
STUDIES+=("$num $sd $STUDY_RESULTS_DIR/exp$num")
|
||||
done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header
|
||||
|
||||
# Initialize missing Exp dirs
|
||||
for study in "${STUDIES[@]}"; do
|
||||
read -r _ _ dir <<< "$study"
|
||||
[[ -d $dir ]] || execute mkdir "$dir"
|
||||
done
|
||||
|
||||
((DEBUG)) && declare -p STUDIES
|
||||
|
||||
# Return true if at least one study was found
|
||||
[[ ${#STUDIES[@]} -gt 0 ]]
|
||||
}
|
||||
125
workflow/apps/python/join_interactions.py
Normal file
125
workflow/apps/python/join_interactions.py
Normal file
@@ -0,0 +1,125 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
|
||||
# Function to parse and set arguments
|
||||
def parse_arguments():
|
||||
if len(sys.argv) == 1: # Interactive mode
|
||||
args = [
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
|
||||
2,
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp2"
|
||||
]
|
||||
else:
|
||||
args = sys.argv[1:]
|
||||
|
||||
return {
|
||||
"out_dir": os.path.abspath(args[0]),
|
||||
"sd": float(args[1]),
|
||||
"study_info": os.path.abspath(args[2]),
|
||||
"input_dirs": args[3:]
|
||||
}
|
||||
|
||||
args = parse_arguments()
|
||||
|
||||
# Create an array for the zscores files
|
||||
def get_zscores_files(dirs):
|
||||
files = [os.path.join(study, "zscores", "zscores_interaction.csv")
|
||||
for study in dirs if os.path.exists(os.path.join(study, "zscores", "zscores_interaction.csv"))]
|
||||
return files
|
||||
|
||||
zscores_files = get_zscores_files(args['input_dirs'])
|
||||
print(f"The SD value is: {args['sd']}")
|
||||
|
||||
# Ensure there are enough files to compare
|
||||
if len(zscores_files) < 2:
|
||||
sys.exit("Not enough experiments to compare, exiting script")
|
||||
|
||||
# Function to join zscores files
|
||||
def join_zscores_files(files):
|
||||
joined_data = pd.read_csv(files[0])
|
||||
for file in files[1:]:
|
||||
temp_data = pd.read_csv(file)
|
||||
joined_data = pd.merge(joined_data, temp_data, on="OrfRep", how="outer")
|
||||
return joined_data
|
||||
|
||||
# Load and join zscores files
|
||||
joined_data = join_zscores_files(zscores_files)
|
||||
|
||||
# Order and select columns
|
||||
def order_and_select_columns(data):
|
||||
ordered_data = data[sorted(data.columns)]
|
||||
selected_headers = ordered_data.filter(regex="OrfRep|Gene|z_lm_k|z_shift_k|z_lm_l|z_shift_l")
|
||||
return selected_headers
|
||||
|
||||
selected_headers = order_and_select_columns(joined_data)
|
||||
|
||||
# Remove redundant columns like "Gene.1"
|
||||
def clean_headers(data, suffixes):
|
||||
suffixes_to_remove = [f"Gene.{i}" for i in range(1, suffixes+1)]
|
||||
return data.drop(columns=suffixes_to_remove, errors='ignore')
|
||||
|
||||
headSel = clean_headers(selected_headers, len(zscores_files) - 1)
|
||||
headSel2 = clean_headers(joined_data.filter(regex="OrfRep|Gene"), len(zscores_files) - 1)
|
||||
|
||||
# Fill NA values in Shift and Z_lm columns
|
||||
def fill_na_in_columns(data):
|
||||
for column in data.columns:
|
||||
if "Shift" in column:
|
||||
data[column].fillna(0.001, inplace=True)
|
||||
elif "Z_lm_" in column:
|
||||
data[column].fillna(0.0001, inplace=True)
|
||||
return data
|
||||
|
||||
headSel = fill_na_in_columns(headSel)
|
||||
|
||||
# Filter based on standard deviation
|
||||
def filter_by_sd(data, sd):
|
||||
if sd == 0:
|
||||
return data
|
||||
z_lm_cols = data.filter(regex="z_lm_")
|
||||
filter_vector = z_lm_cols.abs().ge(sd).any(axis=1)
|
||||
return data[filter_vector]
|
||||
|
||||
REMcRdy = filter_by_sd(headSel.filter(regex="OrfRep|Gene|z_lm_"), args['sd'])
|
||||
shiftOnly = filter_by_sd(headSel.filter(regex="OrfRep|Gene|z_shift"), args['sd'])
|
||||
|
||||
# Reorder columns to interleave Z_lm and Shift data
|
||||
def reorder_columns(data1, data2):
|
||||
combined_data = data1.copy()
|
||||
for i in range(2, data1.shape[1]):
|
||||
combined_data.insert(2 * i - 1, data2.columns[i], data2.iloc[:, i])
|
||||
return combined_data
|
||||
|
||||
combI = reorder_columns(headSel2, shiftOnly)
|
||||
|
||||
# Write output files
|
||||
REMcRdy.to_csv(os.path.join(args['out_dir'], "REMcRdy_lm_only.csv"), index=False, quotechar=False)
|
||||
shiftOnly.to_csv(os.path.join(args['out_dir'], "Shift_only.csv"), index=False, quotechar=False)
|
||||
|
||||
# Relabel headers using experiment names from StudyInfo.csv
|
||||
def relabel_headers(headers, labels):
|
||||
new_labels = headers.copy()
|
||||
for i, header in enumerate(headers):
|
||||
suffix = header.split('.')[-1]
|
||||
if suffix.isdigit() and int(suffix) in range(1, 4):
|
||||
exp_name = labels.iloc[int(suffix) - 1, 1]
|
||||
new_labels[i] = header.replace(f".{suffix}", f"_{exp_name}")
|
||||
return new_labels
|
||||
|
||||
LabelStd = pd.read_csv(args['study_info'])
|
||||
|
||||
shiftOnly.columns = relabel_headers(shiftOnly.columns, LabelStd)
|
||||
REMcRdy.columns = relabel_headers(REMcRdy.columns, LabelStd)
|
||||
|
||||
# Save relabeled files
|
||||
REMcRdy.to_csv(os.path.join(args['out_dir'], "REMcRdy_lm_only.csv"), index=False, quotechar=False)
|
||||
shiftOnly.to_csv(os.path.join(args['out_dir'], "Shift_only.csv"), index=False, quotechar=False)
|
||||
|
||||
# Save updated parameters
|
||||
LabelStd.iloc[:, 3] = args['sd']
|
||||
LabelStd.to_csv(os.path.join(args['out_dir'], "parameters.csv"), index=False)
|
||||
LabelStd.to_csv(args['study_info'], index=False)
|
||||
@@ -1,12 +1,4 @@
|
||||
#!/usr/bin/env Rscript
|
||||
# This R script performs GTA L and K Pairwise Compares for user specified pairs of Experiments
|
||||
#
|
||||
# Updated 240724 Bryan C Roessler to improve file operations and portability
|
||||
# NOTE: The two required arguments are the same and now there are two optional arguments
|
||||
# 1. Exp1
|
||||
# 2. Exp2
|
||||
# 3. StudyInfo.csv file
|
||||
# 4. Output Directory
|
||||
|
||||
library("ggplot2")
|
||||
library("plotly")
|
||||
@@ -16,31 +8,14 @@ library("grid")
|
||||
library("ggthemes")
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
exp_name <- args[1]
|
||||
exp_name2 <- args[2]
|
||||
exp1_name <- args[1]
|
||||
exp1_file <- args[2]
|
||||
exp2_name <- args[3]
|
||||
exp2_file <- args[4]
|
||||
output_dir <- args[5]
|
||||
|
||||
if (length(args) >= 3) {
|
||||
study_info_file <- args[3]
|
||||
} else {
|
||||
study_info_file <- "StudyInfo.csv"
|
||||
}
|
||||
|
||||
if (length(args) >= 4) {
|
||||
output_dir <- args[4]
|
||||
} else {
|
||||
output_dir <- "gta"
|
||||
}
|
||||
|
||||
expNumber1 <- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name))
|
||||
expNumber2 <- as.numeric(sub("^.*?(\\d+)$", "\\1", exp_name2))
|
||||
Labels <- read.csv(file = study_info_file, stringsAsFactors = FALSE)
|
||||
Name1 <- Labels[expNumber1, 2]
|
||||
Name2 <- Labels[expNumber2, 2]
|
||||
go_terms_file <- "Average_GOTerms_All.csv"
|
||||
input_file1 <- file.path(output_dir, exp_name, go_terms_file)
|
||||
input_file2 <- file.path(output_dir, exp_name2, go_terms_file)
|
||||
pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp_name, "-", exp_name2))
|
||||
pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp_name, "-", exp_name2))
|
||||
pairDirL <- file.path(output_dir, paste0("PairwiseCompareL_", exp1_name, "-", exp2_name))
|
||||
pairDirK <- file.path(output_dir, paste0("PairwiseCompareK_", exp1_name, "-", exp2_name))
|
||||
|
||||
# Pairwise L
|
||||
# outputPlotly <- "../GTAresults/PairwiseCompareL/" #"/GTAresults/PairwiseCompareL/"
|
||||
@@ -75,21 +50,6 @@ theme_Publication <- function(base_size = 14, base_family = "sans") {
|
||||
)
|
||||
}
|
||||
|
||||
scale_fill_Publication <- function(...) {
|
||||
library(scales)
|
||||
discrete_scale("fill", "Publication", manual_pal(
|
||||
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...
|
||||
)
|
||||
}
|
||||
|
||||
scale_colour_Publication <- function(...) {
|
||||
discrete_scale("colour", "Publication", manual_pal(
|
||||
values = c("#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...
|
||||
)
|
||||
}
|
||||
|
||||
theme_Publication_legend_right <- function(base_size = 14, base_family = "sans") {
|
||||
(theme_foundation(base_size = base_size, base_family = base_family) +
|
||||
theme(
|
||||
@@ -131,8 +91,8 @@ scale_colour_Publication <- function(...) {
|
||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33")), ...)
|
||||
}
|
||||
|
||||
X1 <- read.csv(file = input_file1, stringsAsFactors = FALSE, header = TRUE)
|
||||
X2 <- read.csv(file = input_file2, stringsAsFactors = FALSE, header = TRUE)
|
||||
X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
|
||||
X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
|
||||
|
||||
X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
|
||||
gg <- ggplot(data = X, aes(
|
||||
@@ -146,7 +106,7 @@ gg <- ggplot(data = X, aes(
|
||||
SD_1 = Z_lm_L_SD_X1,
|
||||
SD_2 = Z_lm_L_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_rect(
|
||||
aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
|
||||
color = "grey20",
|
||||
@@ -156,12 +116,12 @@ gg <- ggplot(data = X, aes(
|
||||
fill = NA
|
||||
) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOntology.pdf")),
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
|
||||
width = 12,
|
||||
height = 8
|
||||
)
|
||||
@@ -169,7 +129,7 @@ pdf(
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOntology.html"))
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
# ID aggravators and alleviators, regardless of whether they meet 2SD threshold
|
||||
@@ -184,19 +144,19 @@ X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_L_Avg_X2 <= -2 & X$Z_lm
|
||||
X$Overlap_Avg <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name1, "Specific_Deletion_Suppresors", sep = "_"))
|
||||
paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
|
||||
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
|
||||
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
|
||||
|
||||
gg <- ggplot(data = X, aes(
|
||||
x = Z_lm_L_Avg_X1,
|
||||
@@ -209,7 +169,7 @@ gg <- ggplot(data = X, aes(
|
||||
SD_1 = Z_lm_L_SD_X1,
|
||||
SD_2 = Z_lm_L_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_rect(
|
||||
aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2),
|
||||
color = "grey20",
|
||||
@@ -219,12 +179,12 @@ gg <- ggplot(data = X, aes(
|
||||
fill = NA
|
||||
) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap.pdf")),
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
|
||||
width = 12,
|
||||
height = 8
|
||||
)
|
||||
@@ -232,7 +192,7 @@ pdf(
|
||||
gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
|
||||
@@ -249,15 +209,15 @@ gg <- ggplot(data = x_rem2_gene, aes(
|
||||
SD_1 = Z_lm_L_SD_X1,
|
||||
SD_2 = Z_lm_L_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_above2genes.pdf")),
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
|
||||
width = 12,
|
||||
height = 8
|
||||
)
|
||||
@@ -266,7 +226,7 @@ gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
#4
|
||||
@@ -282,15 +242,15 @@ gg <- ggplot(data = X_overlap_nothresold, aes(
|
||||
SD_1 = Z_lm_L_SD_X1,
|
||||
SD_2 = Z_lm_L_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.pdf")),
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
|
||||
width = 12,
|
||||
height = 8
|
||||
)
|
||||
@@ -299,7 +259,7 @@ gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.html"))
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
# Only output GTA terms where average score is still above 2 after subtracting the SD
|
||||
@@ -320,16 +280,16 @@ X2_Specific_Aggravators2_X1_Alleviatiors2 <- Z1[which(Z1$L_Subtract_SD_X2 >= 2 &
|
||||
X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 & Z1$L_Subtract_SD_X1 >= 2), ]
|
||||
X$Overlap <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(Name1, "Specific_Deletion_Suppresors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp1_name, "Specific_Deletion_Suppresors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <- paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <- "Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
|
||||
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
|
||||
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
|
||||
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
|
||||
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
|
||||
|
||||
#5
|
||||
X_abovethreshold <- X[!(is.na(X$Overlap)), ]
|
||||
@@ -345,15 +305,15 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
||||
SD_1 = Z_lm_L_SD_X1,
|
||||
SD_2 = Z_lm_L_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.pdf")),
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
|
||||
width = 12,
|
||||
height = 8
|
||||
)
|
||||
@@ -362,7 +322,7 @@ gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.html"))
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
#6
|
||||
@@ -377,16 +337,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
||||
SD_1 = Z_lm_L_SD_X1,
|
||||
SD_2 = Z_lm_L_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3, size = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.pdf")),
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
|
||||
width = 20,
|
||||
height = 20
|
||||
)
|
||||
@@ -395,7 +355,7 @@ gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.html"))
|
||||
fname <- file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
X_abovethreshold$X1_Rank <- NA
|
||||
@@ -415,16 +375,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
||||
SD_1 = Z_lm_L_SD_X1,
|
||||
SD_2 = Z_lm_L_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3, size = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
|
||||
width = 15,
|
||||
height = 15
|
||||
)
|
||||
@@ -435,7 +395,7 @@ pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
fname <-
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
#8
|
||||
@@ -450,16 +410,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
||||
SD_1 = Z_lm_L_SD_X1,
|
||||
SD_2 = Z_lm_L_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3, size = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
|
||||
width = 15,
|
||||
height = 15
|
||||
)
|
||||
@@ -469,18 +429,18 @@ dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <-
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
|
||||
file.path(pairDirL, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
write.csv(
|
||||
x = X,
|
||||
file.path(pairDirL, paste0("All_GTA_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
|
||||
file.path(pairDirL, paste0("All_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
|
||||
row.names = FALSE
|
||||
)
|
||||
|
||||
write.csv(
|
||||
x = X_abovethreshold,
|
||||
file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
|
||||
file = file.path(pairDirL, paste0("AboveThreshold_GTA_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
|
||||
row.names = FALSE
|
||||
)
|
||||
|
||||
@@ -582,8 +542,8 @@ scale_colour_Publication <- function(...) {
|
||||
)
|
||||
}
|
||||
|
||||
X1 <- read.csv(file = input_file1, stringsAsFactors = FALSE, header = TRUE)
|
||||
X2 <- read.csv(file = input_file2, stringsAsFactors = FALSE, header = TRUE)
|
||||
X1 <- read.csv(file = exp1_file, stringsAsFactors = FALSE, header = TRUE)
|
||||
X2 <- read.csv(file = exp2_file, stringsAsFactors = FALSE, header = TRUE)
|
||||
|
||||
#1
|
||||
X <- merge(X1, X2, by = "Term_Avg", all = TRUE, suffixes = c("_X1", "_X2"))
|
||||
@@ -599,15 +559,15 @@ gg <- ggplot(data = X, aes(
|
||||
SD_1 = Z_lm_K_SD_X1,
|
||||
SD_2 = Z_lm_K_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOntology.pdf")),
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOntology.pdf")),
|
||||
width = 12,
|
||||
height = 8
|
||||
)
|
||||
@@ -616,7 +576,7 @@ gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOntology.html"))
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOntology.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
#2
|
||||
@@ -632,23 +592,23 @@ X2_Specific_Alleviators_X1_Aggravators <- X[which(X$Z_lm_K_Avg_X2 <= -2 & X$Z_lm
|
||||
X$Overlap_Avg <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name1, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||
"Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators$Term_Avg, ]$Overlap_Avg <-
|
||||
"Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators_X1_Alleviatiors$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
|
||||
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators_X1_Aggravators$Term_Avg, ]$Overlap_Avg <-
|
||||
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
|
||||
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
|
||||
|
||||
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
|
||||
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
|
||||
gg <- ggplot(data = X, aes(
|
||||
x = Z_lm_K_Avg_X1,
|
||||
y = Z_lm_K_Avg_X2,
|
||||
@@ -660,15 +620,15 @@ gg <- ggplot(data = X, aes(
|
||||
SD_1 = Z_lm_K_SD_X1,
|
||||
SD_2 = Z_lm_K_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap.pdf")),
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap.pdf")),
|
||||
width = 12,
|
||||
height = 8
|
||||
)
|
||||
@@ -678,12 +638,12 @@ dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
|
||||
#2
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap.html"))
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
#3
|
||||
x_rem2_gene <- X[X$NumGenes_Avg_X1 >= 2 & X$NumGenes_Avg_X2 >= 2, ]
|
||||
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
|
||||
plotly_path <- file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
|
||||
gg <- ggplot(data = x_rem2_gene, aes(
|
||||
x = Z_lm_K_Avg_X1,
|
||||
y = Z_lm_K_Avg_X2,
|
||||
@@ -695,15 +655,15 @@ gg <- ggplot(data = x_rem2_gene, aes(
|
||||
SD_1 = Z_lm_K_SD_X1,
|
||||
SD_2 = Z_lm_K_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_above2genes.pdf")),
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_above2genes.pdf")),
|
||||
width = 12,
|
||||
height = 8
|
||||
)
|
||||
@@ -712,7 +672,7 @@ gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_byOverlap_above2genes.html"))
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_byOverlap_above2genes.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
#4
|
||||
@@ -728,15 +688,15 @@ gg <- ggplot(data = X_overlap_nothresold, aes(
|
||||
SD_1 = Z_lm_K_SD_X1,
|
||||
SD_2 = Z_lm_K_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.pdf")),
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.pdf")),
|
||||
width = 12,
|
||||
height = 8
|
||||
)
|
||||
@@ -746,7 +706,7 @@ dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_Above2SD_ByOverlap.html"))
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_Above2SD_ByOverlap.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
#5
|
||||
@@ -769,21 +729,21 @@ X2_Specific_Alleviators2_X1_Aggravators2 <- Z2[which(Z2$L_Subtract_SD_X2 <= -2 &
|
||||
X$Overlap <- NA
|
||||
|
||||
try(X[X$Term_Avg %in% X1_Specific_Aggravators2$Term_Avg, ]$Overlap <-
|
||||
paste(Name1, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X1_Specific_Alleviators2$Term_Avg, ]$Overlap <-
|
||||
paste(Name1, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2$Term_Avg, ]$Overlap <-
|
||||
paste(Name2, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2$Term_Avg, ]$Overlap <-
|
||||
paste(Name2, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% Overlap_Aggravators2$Term_Avg, ]$Overlap <-
|
||||
"Overlapping_Deletion_Suppressors")
|
||||
try(X[X$Term_Avg %in% Overlap_Alleviators2$Term_Avg, ]$Overlap <-
|
||||
"Overlapping_Deletion_Enhancers")
|
||||
try(X[X$Term_Avg %in% X2_Specific_Aggravators2_X1_Alleviatiors2$Term_Avg, ]$Overlap <-
|
||||
paste(Name2, "Deletion_Suppressors", Name1, "Deletion_Enhancers", sep = "_"))
|
||||
paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_"))
|
||||
try(X[X$Term_Avg %in% X2_Specific_Alleviators2_X1_Aggravators2$Term_Avg, ]$Overlap <-
|
||||
paste(Name2, "Deletion_Enhancers", Name1, "Deletion_Suppressors", sep = "_"))
|
||||
paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"))
|
||||
|
||||
X_abovethreshold <- X[!(is.na(X$Overlap)), ]
|
||||
gg <- ggplot(data = X_abovethreshold, aes(
|
||||
@@ -797,15 +757,15 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
||||
SD_1 = Z_lm_K_SD_X1,
|
||||
SD_2 = Z_lm_K_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.pdf")),
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.pdf")),
|
||||
width = 12,
|
||||
height = 8
|
||||
)
|
||||
@@ -814,7 +774,7 @@ gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold.html"))
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
#6
|
||||
@@ -829,16 +789,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
||||
SD_1 = Z_lm_K_SD_X1,
|
||||
SD_2 = Z_lm_K_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_text(aes(label = Term_Avg), nudge_y = 0.25, size = 2) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3, size = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, " vs. ", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, " vs. ", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.pdf")),
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.pdf")),
|
||||
width = 20,
|
||||
height = 20
|
||||
)
|
||||
@@ -846,7 +806,7 @@ gg
|
||||
dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_names.html"))
|
||||
fname <- file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_names.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
#7
|
||||
@@ -866,16 +826,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
||||
SD_1 = Z_lm_K_SD_X1,
|
||||
SD_2 = Z_lm_K_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_text(aes(label = X1_Rank), nudge_y = 0.25, size = 4) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3, size = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.pdf")),
|
||||
width = 15,
|
||||
height = 15
|
||||
)
|
||||
@@ -885,7 +845,7 @@ dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <-
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX1.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
#8
|
||||
@@ -900,16 +860,16 @@ gg <- ggplot(data = X_abovethreshold, aes(
|
||||
SD_1 = Z_lm_K_SD_X1,
|
||||
SD_2 = Z_lm_K_SD_X2
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", Name1)) +
|
||||
xlab(paste("GO Term Avg lm Z for", exp1_name)) +
|
||||
geom_text(aes(label = X2_Rank), nudge_y = 0.25, size = 4) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3, size = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", Name2)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", Name1, "vs.", Name2)) +
|
||||
ylab(paste("GO Term Avg lm Z for", exp2_name)) +
|
||||
ggtitle(paste("Comparing Average GO Term Z lm for", exp1_name, "vs.", exp2_name)) +
|
||||
theme_Publication_legend_right()
|
||||
|
||||
pdf(
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTF_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.pdf")),
|
||||
width = 15,
|
||||
height = 15
|
||||
)
|
||||
@@ -919,17 +879,17 @@ dev.off()
|
||||
pgg <- ggplotly(gg)
|
||||
#pgg
|
||||
fname <-
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", Name1, "_vs_", Name2, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
|
||||
file.path(pairDirK, paste0("Scatter_lm_GTA_Averages_", exp1_name, "_vs_", exp2_name, "_All_ByOverlap_AboveThreshold_numberedX2.html"))
|
||||
htmlwidgets::saveWidget(pgg, fname)
|
||||
|
||||
write.csv(
|
||||
x = X,
|
||||
file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
|
||||
file = file.path(pairDirK, paste0("All_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
|
||||
row.names = FALSE
|
||||
)
|
||||
|
||||
write.csv(
|
||||
x = X_abovethreshold,
|
||||
file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", Name1, "_vs_", Name2, ".csv")),
|
||||
file = file.path(pairDirK, paste0("AboveThreshold_GTF_Avg_Scores_", exp1_name, "_vs_", exp2_name, ".csv")),
|
||||
row.names = FALSE
|
||||
)
|
||||
|
||||
335
workflow/apps/r/calculate_interaction_zscores.R
Normal file
335
workflow/apps/r/calculate_interaction_zscores.R
Normal file
@@ -0,0 +1,335 @@
|
||||
suppressMessages({
|
||||
if (!require("ggplot2")) stop("Package ggplot2 is required but not installed.")
|
||||
if (!require("plotly")) stop("Package plotly is required but not installed.")
|
||||
if (!require("htmlwidgets")) stop("Package htmlwidgets is required but not installed.")
|
||||
if (!require("dplyr")) stop("Package dplyr is required but not installed.")
|
||||
if (!require("ggthemes")) stop("Package ggthemes is required but not installed.")
|
||||
if (!require("plyr")) stop("Package plyr is required but not installed.")
|
||||
})
|
||||
|
||||
# Constants for configuration
|
||||
PLOT_WIDTH <- 14
|
||||
PLOT_HEIGHT <- 9
|
||||
BASE_SIZE <- 14
|
||||
|
||||
options(warn = 2, max.print = 100)
|
||||
|
||||
parse_arguments <- function() {
|
||||
args <- if (interactive()) {
|
||||
c(
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240116_jhartman2_DoxoHLD",
|
||||
3,
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/apps/r/SGD_features.tab",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/easy/20240116_jhartman2_DoxoHLD/results_std.txt",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
|
||||
"Experiment 1: Doxo versus HLD",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
|
||||
"Experiment 2: HLD versus Doxo",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp3",
|
||||
"Experiment 3: HLD versus WT",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp4",
|
||||
"Experiment 4: Doxo versus WT"
|
||||
)
|
||||
} else {
|
||||
commandArgs(trailingOnly = TRUE)
|
||||
}
|
||||
|
||||
paths <- normalizePath(file.path(args[seq(5, length(args), by = 2)]), mustWork = FALSE)
|
||||
names <- args[seq(6, length(args), by = 2)]
|
||||
experiments <- setNames(paths, names)
|
||||
|
||||
list(
|
||||
out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
|
||||
sd = as.numeric(args[2]),
|
||||
sgd_gene_list = normalizePath(file.path(args[3]), mustWork = FALSE),
|
||||
easy_results_file = normalizePath(file.path(args[4]), mustWork = FALSE),
|
||||
experiments = experiments
|
||||
)
|
||||
}
|
||||
|
||||
args <- parse_arguments()
|
||||
|
||||
dir.create(args$out_dir, showWarnings = FALSE)
|
||||
|
||||
# Define themes and scales
|
||||
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
|
||||
theme_foundation(base_size = base_size, base_family = base_family) +
|
||||
theme(
|
||||
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
|
||||
text = element_text(),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold", size = rel(1)),
|
||||
axis.title.y = element_text(angle = 90, vjust = 2),
|
||||
axis.title.x = element_text(vjust = -0.2),
|
||||
axis.line = element_line(colour = "black"),
|
||||
panel.grid.major = element_line(colour = "#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.key = element_rect(colour = NA),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
plot.margin = unit(c(10, 5, 5, 5), "mm"),
|
||||
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
|
||||
strip.text = element_text(face = "bold")
|
||||
)
|
||||
}
|
||||
|
||||
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
|
||||
theme_publication(base_size, base_family) +
|
||||
theme(
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size = unit(0.5, "cm"),
|
||||
legend.spacing = unit(0, "cm"),
|
||||
legend.title = element_text(face = "italic")
|
||||
)
|
||||
}
|
||||
|
||||
scale_fill_publication <- function(...) {
|
||||
discrete_scale("fill", "Publication", manual_pal(values = c(
|
||||
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
||||
)), ...)
|
||||
}
|
||||
|
||||
scale_colour_publication <- function(...) {
|
||||
discrete_scale("colour", "Publication", manual_pal(values = c(
|
||||
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
||||
)), ...)
|
||||
}
|
||||
|
||||
# Load SGD gene list
|
||||
sgd_genes <- function(sgd_gene_list) {
|
||||
read.delim(file = sgd_gene_list, quote = "", header = FALSE,
|
||||
colClasses = c(rep("NULL", 3), rep("character", 2), rep("NULL", 11))) %>%
|
||||
dplyr::rename(ORF = V4, GeneName = V5)
|
||||
}
|
||||
|
||||
genes <- sgd_genes(args$sgd_gene_list)
|
||||
|
||||
load_and_preprocess_data <- function(easy_results_file, genes) {
|
||||
df <- tryCatch({
|
||||
read.delim(easy_results_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
||||
}, error = function(e) {
|
||||
stop("Error reading file: ", easy_results_file, "\n", e$message)
|
||||
}) %>%
|
||||
filter(!.[[1]] %in% c("", "Scan")) # Fixed syntax
|
||||
|
||||
numeric_columns <- c("Col", "Row", "l", "K", "r", "Scan", "AUC96", "LstBackgrd", "X1stBackgrd")
|
||||
df[numeric_columns[numeric_columns %in% colnames(df)]] <-
|
||||
lapply(df[numeric_columns[numeric_columns %in% colnames(df)]], as.numeric)
|
||||
|
||||
df <- df %>%
|
||||
mutate(
|
||||
L = if ("l" %in% colnames(.)) l else {warning("Missing column: l"); NA},
|
||||
AUC = if ("AUC96" %in% colnames(.)) AUC96 else {warning("Missing column: AUC96"); NA},
|
||||
conc_num = if ("Conc" %in% colnames(.)) as.numeric(gsub("[^0-9\\.]", "", Conc)) else NA,
|
||||
delta_bg = if (all(c("X1stBackgrd", "LstBackgrd") %in% colnames(.)))
|
||||
LstBackgrd - X1stBackgrd else {warning("Missing columns for delta_bg calculation"); NA},
|
||||
GeneName = vapply(ORF, function(orf) {
|
||||
gene_name <- genes %>% filter(ORF == orf) %>% pull(GeneName)
|
||||
ifelse(is.null(gene_name) || gene_name == "" || length(gene_name) == 0, orf, gene_name)
|
||||
}, character(1)) # Ensures a character vector is returned
|
||||
)
|
||||
|
||||
if (nrow(df) == 0) warning("Dataframe is empty after filtering")
|
||||
|
||||
return(df)
|
||||
}
|
||||
|
||||
create_and_publish_plot <- function(df, var, plot_type, out_dir_qc, suffix = "") {
|
||||
plot_func <- if (plot_type == "scatter") geom_point else geom_boxplot
|
||||
filtered_df <- filter(df, is.finite(.data[[var]]))
|
||||
|
||||
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
|
||||
plot_func(shape = 3, size = 0.2, position = "jitter") +
|
||||
stat_summary(fun = mean, geom = "point", size = 0.6) +
|
||||
stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), geom = "errorbar") +
|
||||
ggtitle(paste("Plate analysis by Drug Conc for", var, "before quality control")) +
|
||||
theme_publication()
|
||||
|
||||
pdf_path <- file.path(out_dir_qc, paste0("plate_analysis_", var, suffix, ".pdf"))
|
||||
pdf(pdf_path, width = PLOT_WIDTH, height = PLOT_HEIGHT)
|
||||
print(p)
|
||||
dev.off()
|
||||
|
||||
html_path <- sub(".pdf$", ".html", pdf_path)
|
||||
pgg <- suppressWarnings(ggplotly(p, tooltip = c("L", "K", "ORF", "Gene", "delta_bg", "GeneName")) %>%
|
||||
layout(legend = list(orientation = "h")))
|
||||
saveWidget(pgg, html_path, selfcontained = TRUE)
|
||||
}
|
||||
|
||||
|
||||
publish_summary_stats <- function(df, variables, out_dir) {
|
||||
stats_list <- lapply(variables, function(var) {
|
||||
df %>%
|
||||
dplyr::group_by(OrfRep, conc_num) %>%
|
||||
dplyr::summarize(
|
||||
N = dplyr::n(), # Ensure that the correct version of n() is used
|
||||
mean_val = mean(.data[[var]], na.rm = TRUE),
|
||||
sd_val = sd(.data[[var]], na.rm = TRUE),
|
||||
se_val = sd_val / sqrt(N)
|
||||
)
|
||||
})
|
||||
summary_stats_df <- dplyr::bind_rows(stats_list, .id = "variable")
|
||||
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||
}
|
||||
|
||||
|
||||
publish_interaction_scores <- function(df, out_dir) {
|
||||
interaction_scores <- df %>%
|
||||
group_by(OrfRep) %>%
|
||||
summarize(
|
||||
mean_L = mean(L, na.rm = TRUE),
|
||||
mean_K = mean(K, na.rm = TRUE),
|
||||
mean_r = mean(r, na.rm = TRUE),
|
||||
mean_AUC = mean(AUC, na.rm = TRUE),
|
||||
delta_bg_mean = mean(delta_bg, na.rm = TRUE),
|
||||
delta_bg_sd = sd(delta_bg, na.rm = TRUE)
|
||||
) %>%
|
||||
mutate(
|
||||
l_rank = rank(mean_L),
|
||||
k_rank = rank(mean_K),
|
||||
r_rank = rank(mean_r),
|
||||
auc_rank = rank(mean_AUC)
|
||||
)
|
||||
|
||||
write.csv(interaction_scores, file.path(out_dir, "rf_zscores_interaction.csv"), row.names = FALSE)
|
||||
write.csv(arrange(interaction_scores, l_rank, k_rank),
|
||||
file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
|
||||
}
|
||||
|
||||
publish_zscores <- function(df, out_dir) {
|
||||
zscores <- df %>%
|
||||
mutate(
|
||||
zscore_L = scale(L, center = TRUE, scale = TRUE),
|
||||
zscore_K = scale(K, center = TRUE, scale = TRUE),
|
||||
zscore_r = scale(r, center = TRUE, scale = TRUE),
|
||||
zscore_AUC = scale(AUC, center = TRUE, scale = TRUE)
|
||||
)
|
||||
|
||||
write.csv(zscores, file.path(out_dir, "zscores_interaction.csv"), row.names = FALSE)
|
||||
}
|
||||
|
||||
generate_and_publish_qc <- function(df, delta_bg_tolerance, out_dir_qc) {
|
||||
variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||
lapply(variables, create_and_publish_plot, df = df, plot_type = "scatter", out_dir_qc = out_dir_qc)
|
||||
delta_bg_above_tolerance <- filter(df, delta_bg >= delta_bg_tolerance)
|
||||
lapply(variables, create_and_publish_plot, df = delta_bg_above_tolerance,
|
||||
plot_type = "scatter", out_dir_qc = out_dir_qc, suffix = "_above_tolerance")
|
||||
}
|
||||
|
||||
process_exp_dir <- function(exp_dir, exp_name, genes, easy_results_file) {
|
||||
out_dir <- file.path(exp_dir, "zscores")
|
||||
out_dir_qc <- file.path(exp_dir, "qc")
|
||||
dir.create(out_dir, showWarnings = FALSE)
|
||||
dir.create(out_dir_qc, showWarnings = FALSE)
|
||||
df <- load_and_preprocess_data(easy_results_file, genes)
|
||||
delta_bg_tolerance <- mean(df$delta_bg, na.rm = TRUE) + 3 * sd(df$delta_bg, na.rm = TRUE)
|
||||
|
||||
generate_and_publish_qc(df, delta_bg_tolerance, out_dir_qc)
|
||||
|
||||
variables <- c("L", "K", "r", "AUC", "delta_bg")
|
||||
publish_summary_stats(df, variables, out_dir)
|
||||
publish_interaction_scores(df, out_dir)
|
||||
publish_zscores(df, out_dir)
|
||||
|
||||
list(
|
||||
zscores_file = file.path(out_dir, "zscores_interaction.csv"),
|
||||
exp_name = exp_name
|
||||
)
|
||||
}
|
||||
|
||||
processed_experiments <- lapply(names(args$experiments), function(exp_name) {
|
||||
process_exp_dir(args$experiments[[exp_name]], exp_name, genes, args$easy_results_file)
|
||||
})
|
||||
|
||||
zscores_files <- sapply(processed_experiments, `[[`, "zscores_file")
|
||||
exp_names <- sapply(processed_experiments, `[[`, "exp_name")
|
||||
|
||||
combine_zscores <- function(zscores_files) {
|
||||
if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
|
||||
|
||||
joined_data <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
|
||||
for (file in zscores_files[-1]) {
|
||||
temp_data <- read.csv(file = file, stringsAsFactors = FALSE)
|
||||
joined_data <- plyr::join(joined_data, temp_data, by = "OrfRep", type = "full")
|
||||
}
|
||||
joined_data
|
||||
}
|
||||
|
||||
process_combined_zscores <- function(joined_data, sd, out_dir, exp_names) {
|
||||
ordered_data <- joined_data %>%
|
||||
select(contains("OrfRep"), matches("Gene"),
|
||||
contains("z_lm_k"), contains("z_shift_k"),
|
||||
contains("z_lm_l"), contains("z_shift_l")) %>%
|
||||
arrange(contains("OrfRep"))
|
||||
|
||||
clean_headers <- function(data, suffixes) {
|
||||
suffixes_to_remove <- paste0("Gene.", seq_len(suffixes))
|
||||
select(data, -all_of(suffixes_to_remove))
|
||||
}
|
||||
|
||||
headSel <- clean_headers(ordered_data, length(zscores_files) - 1)
|
||||
headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1)
|
||||
|
||||
fill_na_in_columns <- function(data) {
|
||||
for (header in colnames(data)) {
|
||||
if (grepl("Shift", header)) {
|
||||
data[[header]][is.na(data[[header]])] <- 0.001
|
||||
} else if (grepl("Z_lm_", header)) {
|
||||
data[[header]][is.na(data[[header]])] <- 0.0001
|
||||
}
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
headSel <- fill_na_in_columns(headSel)
|
||||
|
||||
filter_by_sd <- function(data, sd) {
|
||||
if (sd == 0) return(data)
|
||||
|
||||
z_lm_cols <- select(data, contains("z_lm_"))
|
||||
filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
|
||||
data[filter_vector, ]
|
||||
}
|
||||
|
||||
REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), sd)
|
||||
shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), sd)
|
||||
|
||||
reorder_columns <- function(data1, data2) {
|
||||
combined_data <- data1
|
||||
for (i in 3:ncol(data1)) {
|
||||
combined_data <- cbind(combined_data, data2[i], data1[i])
|
||||
}
|
||||
combined_data
|
||||
}
|
||||
|
||||
combI <- reorder_columns(headSel2, shiftOnly)
|
||||
|
||||
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
|
||||
relabel_headers <- function(headers, exp_names) {
|
||||
new_labels <- headers
|
||||
for (i in seq_along(headers)) {
|
||||
suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
|
||||
if (suffix %in% seq_along(exp_names)) {
|
||||
exp_name <- exp_names[as.numeric(suffix)]
|
||||
new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
|
||||
}
|
||||
}
|
||||
new_labels
|
||||
}
|
||||
|
||||
colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), exp_names)
|
||||
colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), exp_names)
|
||||
|
||||
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
}
|
||||
|
||||
joined_data <- combine_zscores(zscores_files)
|
||||
process_combined_zscores(joined_data, args$sd, args$out_dir, exp_names)
|
||||
209
workflow/apps/r/calculate_pairwise_lk.R
Normal file
209
workflow/apps/r/calculate_pairwise_lk.R
Normal file
@@ -0,0 +1,209 @@
|
||||
suppressMessages({
|
||||
library("ggplot2")
|
||||
library("plotly")
|
||||
library("htmlwidgets")
|
||||
library("extrafont")
|
||||
library("grid")
|
||||
library("ggthemes")
|
||||
})
|
||||
|
||||
# Constants for configuration
|
||||
PLOT_WIDTH <- 12
|
||||
PLOT_HEIGHT <- 8
|
||||
BASE_SIZE <- 14
|
||||
|
||||
options(warn = 2, max.print = 100)
|
||||
|
||||
# Parse arguments
|
||||
parse_arguments <- function() {
|
||||
if (interactive()) {
|
||||
args <- c(
|
||||
"Exp1",
|
||||
"/path/to/exp1_file.csv",
|
||||
"Exp2",
|
||||
"/path/to/exp2_file.csv",
|
||||
"/path/to/output_dir"
|
||||
)
|
||||
} else {
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
}
|
||||
list(
|
||||
exp1_name = args[1],
|
||||
exp1_file = normalizePath(args[2], mustWork = TRUE),
|
||||
exp2_name = args[3],
|
||||
exp2_file = normalizePath(args[4], mustWork = TRUE),
|
||||
output_dir = normalizePath(args[5], mustWork = FALSE)
|
||||
)
|
||||
}
|
||||
|
||||
args <- parse_arguments()
|
||||
|
||||
# Create output directories if they don't exist
|
||||
pairDirL <- file.path(args$output_dir, paste0("PairwiseCompareL_", args$exp1_name, "-", args$exp2_name))
|
||||
pairDirK <- file.path(args$output_dir, paste0("PairwiseCompareK_", args$exp1_name, "-", args$exp2_name))
|
||||
dir.create(pairDirL, showWarnings = FALSE, recursive = TRUE)
|
||||
dir.create(pairDirK, showWarnings = FALSE, recursive = TRUE)
|
||||
|
||||
# Define themes and scales
|
||||
theme_publication <- function(base_size = BASE_SIZE, base_family = "sans") {
|
||||
theme_foundation(base_size = base_size, base_family = base_family) +
|
||||
theme(
|
||||
plot.title = element_text(face = "bold", size = rel(1.2), hjust = 0.5),
|
||||
panel.background = element_rect(colour = NA),
|
||||
plot.background = element_rect(colour = NA),
|
||||
panel.border = element_rect(colour = NA),
|
||||
axis.title = element_text(face = "bold", size = rel(1)),
|
||||
axis.line = element_line(colour = "black"),
|
||||
panel.grid.major = element_line(colour = "#f0f0f0"),
|
||||
panel.grid.minor = element_blank(),
|
||||
legend.position = "bottom",
|
||||
legend.direction = "horizontal",
|
||||
legend.key.size = unit(0.2, "cm"),
|
||||
plot.margin = unit(c(10, 5, 5, 5), "mm"),
|
||||
strip.background = element_rect(colour = "#f0f0f0", fill = "#f0f0f0"),
|
||||
strip.text = element_text(face = "bold")
|
||||
)
|
||||
}
|
||||
|
||||
theme_publication_legend_right <- function(base_size = BASE_SIZE, base_family = "sans") {
|
||||
theme_publication(base_size, base_family) +
|
||||
theme(
|
||||
legend.position = "right",
|
||||
legend.direction = "vertical",
|
||||
legend.key.size = unit(0.5, "cm")
|
||||
)
|
||||
}
|
||||
|
||||
scale_fill_publication <- function(...) {
|
||||
discrete_scale("fill", "Publication", manual_pal(values = c(
|
||||
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
||||
)), ...)
|
||||
}
|
||||
|
||||
scale_colour_publication <- function(...) {
|
||||
discrete_scale("colour", "Publication", manual_pal(values = c(
|
||||
"#386cb0", "#fdb462", "#7fc97f", "#ef3b2c", "#662506",
|
||||
"#a6cee3", "#fb9a99", "#984ea3", "#ffff33"
|
||||
)), ...)
|
||||
}
|
||||
|
||||
# Read and merge data
|
||||
load_and_merge_data <- function(file1, file2) {
|
||||
df1 <- read.csv(file = file1, stringsAsFactors = FALSE, header = TRUE)
|
||||
df2 <- read.csv(file = file2, stringsAsFactors = FALSE, header = TRUE)
|
||||
merge(df1, df2, by = "Term_Avg", all = TRUE, suffixes = c("_df1", "_df2"))
|
||||
}
|
||||
|
||||
# Generate a plot and save to PDF and HTML
|
||||
generate_plot <- function(data, x_var, y_var, color_var, title, file_path, theme_function) {
|
||||
gg <- ggplot(data = data, aes_string(
|
||||
x = x_var,
|
||||
y = y_var,
|
||||
color = color_var
|
||||
)) +
|
||||
xlab(paste("GO Term Avg lm Z for", args$exp1_name)) +
|
||||
geom_rect(aes(xmin = -2, xmax = 2, ymin = -2, ymax = 2), color = "grey20", size = 0.25, alpha = 0.1, inherit.aes = FALSE, fill = NA) +
|
||||
geom_point(shape = 3) +
|
||||
ylab(paste("GO Term Avg lm Z for", args$exp2_name)) +
|
||||
ggtitle(title) +
|
||||
theme_function()
|
||||
|
||||
pdf(file.path(file_path, ".pdf"), width = PLOT_WIDTH, height = PLOT_HEIGHT)
|
||||
print(gg)
|
||||
dev.off()
|
||||
|
||||
pgg <- ggplotly(gg)
|
||||
htmlwidgets::saveWidget(pgg, file.path(file_path, ".html"))
|
||||
}
|
||||
|
||||
# Identify and annotate specific interactions
|
||||
annotate_interactions <- function(df, exp1_name, exp2_name, suffix) {
|
||||
df$Overlap_Avg <- NA
|
||||
interactions <- list(
|
||||
"df1_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] < 2),
|
||||
"df1_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] > -2),
|
||||
"df2_Specific_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] < 2),
|
||||
"df2_Specific_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] > -2),
|
||||
"Overlap_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2),
|
||||
"Overlap_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2),
|
||||
"df2_Aggravators_df1_Alleviators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] >= 2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] <= -2),
|
||||
"df2_Alleviators_df1_Aggravators" = which(df[[paste0("Z_lm_", suffix, "_Avg_df2")]] <= -2 & df[[paste0("Z_lm_", suffix, "_Avg_df1")]] >= 2)
|
||||
)
|
||||
annotation <- list(
|
||||
df1_Specific_Aggravators = paste(exp1_name, "Specific_Deletion_Enhancers", sep = "_"),
|
||||
df1_Specific_Alleviators = paste(exp1_name, "Specific_Deletion_Suppressors", sep = "_"),
|
||||
df2_Specific_Aggravators = paste(exp2_name, "Specific_Deletion_Enhancers", sep = "_"),
|
||||
df2_Specific_Alleviators = paste(exp2_name, "Specific_Deletion_Suppressors", sep = "_"),
|
||||
Overlap_Aggravators = "Overlapping_Deletion_Enhancers",
|
||||
Overlap_Alleviators = "Overlapping_Deletion_Suppressors",
|
||||
df2_Aggravators_df1_Alleviators = paste(exp2_name, "Deletion_Enhancers", exp1_name, "Deletion_Suppressors", sep = "_"),
|
||||
df2_Alleviators_df1_Aggravators = paste(exp2_name, "Deletion_Suppressors", exp1_name, "Deletion_Enhancers", sep = "_")
|
||||
)
|
||||
for (key in names(interactions)) {
|
||||
try(df$Overlap_Avg[interactions[[key]]] <- annotation[[key]])
|
||||
}
|
||||
df
|
||||
}
|
||||
|
||||
# Rank and filter data
|
||||
rank_and_filter_data <- function(df, suffix) {
|
||||
z1 <- df
|
||||
z1[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df1")]] - z1[[paste0("Z_lm_", suffix, "_SD_df1")]]
|
||||
z1[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z1[[paste0("Z_lm_", suffix, "_Avg_df2")]] - z1[[paste0("Z_lm_", suffix, "_SD_df2")]]
|
||||
|
||||
z2 <- df
|
||||
z2[[paste0("L_Subtract_SD_", suffix, "_df1")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df1")]] + z2[[paste0("Z_lm_", suffix, "_SD_df1")]]
|
||||
z2[[paste0("L_Subtract_SD_", suffix, "_df2")]] <- z2[[paste0("Z_lm_", suffix, "_Avg_df2")]] + z2[[paste0("Z_lm_", suffix, "_SD_df2")]]
|
||||
|
||||
df_above_threshold <- df[!is.na(df$Overlap_Avg), ]
|
||||
df_above_threshold$df1_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df1")]], ties.method = "random")
|
||||
df_above_threshold$df2_Rank <- rank(-df_above_threshold[[paste0("Z_lm_", suffix, "_Avg_df2")]], ties.method = "random")
|
||||
|
||||
list(z1 = z1, z2 = z2, df_above_threshold = df_above_threshold)
|
||||
}
|
||||
|
||||
# Main execution function for a pairwise comparison
|
||||
run_pairwise_comparison <- function(suffix, dir) {
|
||||
df <- load_and_merge_data(args$exp1_file, args$exp2_file)
|
||||
|
||||
# Generate initial ontology-based plot
|
||||
generate_plot(df,
|
||||
paste0("Z_lm_", suffix, "_Avg_df1"), paste0("Z_lm_", suffix, "_Avg_df2"), "Ontology_Avg_df1",
|
||||
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
|
||||
file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOntology")),
|
||||
theme_publication_legend_right)
|
||||
|
||||
# Annotate interactions and generate overlap-based plot
|
||||
df <- annotate_interactions(df, args$exp1_name, args$exp2_name, suffix)
|
||||
ranks <- rank_and_filter_data(df, suffix)
|
||||
|
||||
generate_plot(df,
|
||||
paste0("Z_lm_", suffix, "_Avg_df1"),
|
||||
paste0("Z_lm_", suffix, "_Avg_df2"),
|
||||
"Overlap_Avg",
|
||||
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name),
|
||||
file.path(dir, paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap")),
|
||||
theme_publication_legend_right)
|
||||
|
||||
generate_plot(ranks$df_above_threshold,
|
||||
paste0("Z_lm_", suffix, "_Avg_df1"),
|
||||
paste0("Z_lm_", suffix, "_Avg_df2"),
|
||||
"Overlap_Avg",
|
||||
paste("Comparing Average GO Term Z lm for", args$exp1_name, "vs.", args$exp2_name, "Above Threshold"),
|
||||
file.path(dir,
|
||||
paste0("Scatter_lm_GTA_Averages_", args$exp1_name, "_vs_", args$exp2_name, "_All_ByOverlap_AboveThreshold")),
|
||||
theme_publication_legend_right)
|
||||
|
||||
# Save CSV files
|
||||
write.csv(df, file.path(dir,
|
||||
paste0("All_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
|
||||
row.names = FALSE)
|
||||
write.csv(ranks$df_above_threshold,
|
||||
file.path(dir, paste0("AboveThreshold_GTA_Avg_Scores_", args$exp1_name, "_vs_", args$exp2_name, ".csv")),
|
||||
row.names = FALSE)
|
||||
}
|
||||
|
||||
# Execute Pairwise L and Pairwise K comparisons
|
||||
run_pairwise_comparison("L", pairDirL)
|
||||
run_pairwise_comparison("K", pairDirK)
|
||||
@@ -14,7 +14,7 @@ BASE_SIZE <- 14
|
||||
|
||||
options(warn = 2, max.print = 100)
|
||||
|
||||
# Function to parse arguments
|
||||
# Parse arguments
|
||||
parse_arguments <- function() {
|
||||
if (interactive()) {
|
||||
args <- c(
|
||||
@@ -92,7 +92,7 @@ scale_colour_publication <- function(...) {
|
||||
)), ...)
|
||||
}
|
||||
|
||||
# Function to load and preprocess data
|
||||
# Load and preprocess data
|
||||
load_and_preprocess_data <- function(input_file) {
|
||||
df <- tryCatch({
|
||||
read.delim(input_file, skip = 2, as.is = TRUE, row.names = 1, strip.white = TRUE)
|
||||
@@ -124,7 +124,7 @@ df <- df %>%
|
||||
mutate(OrfRep = if_else(ORF == "YDL227C", "YDL227C", OrfRep)) %>%
|
||||
filter(!Gene %in% c("BLANK", "Blank", "blank"), Drug != "BMH21")
|
||||
|
||||
# Function to create plot
|
||||
# Create plot
|
||||
create_plot <- function(df, var, plot_type) {
|
||||
filtered_df <- df %>% filter(is.finite(.data[[var]]))
|
||||
p <- ggplot(filtered_df, aes(Scan, .data[[var]], color = as.factor(conc_num))) +
|
||||
@@ -143,7 +143,7 @@ create_plot <- function(df, var, plot_type) {
|
||||
return(p)
|
||||
}
|
||||
|
||||
# Function to publish plot to PDF and HTML (Plotly)
|
||||
# Publish plot to PDF and HTML (Plotly)
|
||||
publish_plot <- function(plot, plot_path) {
|
||||
# if (file.exists(plot_path)) {
|
||||
# file.rename(plot_path, paste0(plot_path, BACKUP_SUFFIX))
|
||||
@@ -159,7 +159,7 @@ publish_plot <- function(plot, plot_path) {
|
||||
saveWidget(pgg, sub(".pdf$", ".html", plot_path), selfcontained = TRUE)
|
||||
}
|
||||
|
||||
# Function to publish multiple plots
|
||||
# Publish multiple plots
|
||||
publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix = "") {
|
||||
lapply(variables, function(var) {
|
||||
plot <- create_plot(df, var, plot_type)
|
||||
@@ -167,7 +167,7 @@ publish_multiple_plots <- function(df, variables, plot_type, out_dir_qc, suffix
|
||||
})
|
||||
}
|
||||
|
||||
# Function to calculate and publish summary statistics
|
||||
# Calculate and publish summary statistics
|
||||
publish_summary_stats <- function(df, variables, out_dir) {
|
||||
stats_list <- lapply(variables, function(var) {
|
||||
df %>%
|
||||
@@ -183,7 +183,7 @@ publish_summary_stats <- function(df, variables, out_dir) {
|
||||
write.csv(summary_stats_df, file.path(out_dir, "summary_stats_all_strains.csv"), row.names = FALSE)
|
||||
}
|
||||
|
||||
# Function to calculate and publish interaction scores
|
||||
# Calculate and publish interaction scores
|
||||
publish_interaction_scores <- function(df, out_dir) {
|
||||
interaction_scores <- df %>%
|
||||
group_by(OrfRep) %>%
|
||||
@@ -207,7 +207,7 @@ publish_interaction_scores <- function(df, out_dir) {
|
||||
arrange(l_rank, k_rank), file.path(out_dir, "rf_zscores_interaction_ranked.csv"), row.names = FALSE)
|
||||
}
|
||||
|
||||
# Function to publish z-scores
|
||||
# Publish z-scores
|
||||
publish_zscores <- function(df, out_dir) {
|
||||
zscores <- df %>%
|
||||
mutate(
|
||||
|
||||
@@ -1,238 +1,375 @@
|
||||
#!/usr/bin/env Rscript
|
||||
# JoinInteractExps.R
|
||||
|
||||
library("plyr")
|
||||
library("sos")
|
||||
library("dplyr")
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
|
||||
# Set output dir
|
||||
if (length(args) >= 1) {
|
||||
out_dir <- file.path(args[1])
|
||||
} else {
|
||||
out_dir <- "./" # for legacy workflow
|
||||
}
|
||||
|
||||
# Set sd value
|
||||
if (length(args) >= 2) {
|
||||
sd <- as.numeric(args[2])
|
||||
} else {
|
||||
sd <- 2 # default value
|
||||
}
|
||||
|
||||
sprintf("SD value is: %d", sd)
|
||||
|
||||
# Set study_info file
|
||||
if (length(args) >= 3) {
|
||||
study_info <- file.path(args[3])
|
||||
} else {
|
||||
study_info <- "../Code/StudyInfo.csv" # for legacy workflow
|
||||
}
|
||||
|
||||
studies <- args[4:length(args)]
|
||||
print(studies)
|
||||
input_files <- c()
|
||||
for (i in seq_along(studies)) {
|
||||
study <- studies[i]
|
||||
zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
|
||||
if (file.exists(zs_file)) {
|
||||
input_files[i] <- zs_file
|
||||
# Function to parse and set arguments
|
||||
parse_arguments <- function() {
|
||||
if (interactive()) {
|
||||
args <- c(
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
|
||||
2,
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/StudyInfo.csv",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp1",
|
||||
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/Exp2"
|
||||
)
|
||||
} else {
|
||||
args <- commandArgs(trailingOnly = TRUE)
|
||||
}
|
||||
}
|
||||
rm(zs_file, study)
|
||||
|
||||
# for (var in ls()) {
|
||||
# print(paste(var, ":", get(var)))
|
||||
list(
|
||||
out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
|
||||
sd = as.numeric(args[2]),
|
||||
study_info = normalizePath(file.path(args[3]), mustWork = FALSE),
|
||||
input_dirs = args[4:length(args)]
|
||||
)
|
||||
}
|
||||
|
||||
args <- parse_arguments()
|
||||
|
||||
# Create an array for the zscores files
|
||||
get_zscores_files <- function(dirs) {
|
||||
files <- sapply(dirs, function(study) {
|
||||
file_path <- file.path(study, "zscores", "zscores_interaction.csv")
|
||||
if (file.exists(file_path)) file_path else NULL
|
||||
}, simplify = TRUE, USE.NAMES = FALSE)
|
||||
|
||||
# Filter out NULL entries
|
||||
files[!sapply(files, is.null)]
|
||||
}
|
||||
|
||||
zscores_files <- get_zscores_files(args$input_dirs)
|
||||
sprintf("The SD value is: %d", args$sd)
|
||||
|
||||
# Ensure there are enough files to compare
|
||||
if (length(zscores_files) < 2) stop("Not enough experiments to compare, exiting script")
|
||||
|
||||
# Function to join zscores files
|
||||
join_zscores_files <- function(files) {
|
||||
joined_data <- read.csv(file = files[1], stringsAsFactors = FALSE)
|
||||
for (file in files[-1]) {
|
||||
temp_data <- read.csv(file = file, stringsAsFactors = FALSE)
|
||||
joined_data <- join(joined_data, temp_data, by = "OrfRep")
|
||||
}
|
||||
joined_data
|
||||
}
|
||||
|
||||
# Load and join zscores files
|
||||
joined_data <- join_zscores_files(zscores_files)
|
||||
|
||||
# Order and select columns
|
||||
order_and_select_columns <- function(data) {
|
||||
ordered_data <- data[, order(colnames(data))]
|
||||
selected_headers <- select(ordered_data,
|
||||
contains("OrfRep"), matches("Gene"),
|
||||
contains("z_lm_k"), contains("z_shift_k"),
|
||||
contains("z_lm_l"), contains("z_shift_l"))
|
||||
selected_headers
|
||||
}
|
||||
|
||||
selected_headers <- order_and_select_columns(joined_data)
|
||||
|
||||
# Remove redundant columns like "Gene.1"
|
||||
clean_headers <- function(data, suffixes) {
|
||||
suffixes_to_remove <- paste0("Gene.", seq_len(suffixes))
|
||||
select(data, -all_of(suffixes_to_remove))
|
||||
}
|
||||
|
||||
headSel <- clean_headers(selected_headers, length(zscores_files) - 1)
|
||||
headSel2 <- clean_headers(select(joined_data, contains("OrfRep"), matches("Gene")), length(zscores_files) - 1)
|
||||
|
||||
# Fill NA values in Shift and Z_lm columns
|
||||
fill_na_in_columns <- function(data) {
|
||||
for (header in colnames(data)) {
|
||||
if (grepl("Shift", header)) {
|
||||
data[[header]][is.na(data[[header]])] <- 0.001
|
||||
} else if (grepl("Z_lm_", header)) {
|
||||
data[[header]][is.na(data[[header]])] <- 0.0001
|
||||
}
|
||||
}
|
||||
data
|
||||
}
|
||||
|
||||
headSel <- fill_na_in_columns(headSel)
|
||||
|
||||
# Filter based on standard deviation
|
||||
filter_by_sd <- function(data, sd) {
|
||||
if (sd == 0) return(data)
|
||||
|
||||
z_lm_cols <- select(data, contains("z_lm_"))
|
||||
filter_vector <- rowSums(abs(z_lm_cols) >= sd) > 0
|
||||
data[filter_vector, ]
|
||||
}
|
||||
|
||||
REMcRdy <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_")), args$sd)
|
||||
shiftOnly <- filter_by_sd(select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift")), args$sd)
|
||||
|
||||
# Reorder columns to interleave Z_lm and Shift data
|
||||
reorder_columns <- function(data1, data2) {
|
||||
combined_data <- data1
|
||||
for (i in 3:ncol(data1)) {
|
||||
combined_data <- cbind(combined_data, data2[i], data1[i])
|
||||
}
|
||||
combined_data
|
||||
}
|
||||
|
||||
combI <- reorder_columns(headSel2, shiftOnly)
|
||||
|
||||
# Write output files
|
||||
write.csv(REMcRdy, file.path(args$out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
write.csv(shiftOnly, file.path(args$out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
|
||||
# Relabel headers using experiment names from StudyInfo.csv
|
||||
relabel_headers <- function(headers, labels) {
|
||||
new_labels <- headers
|
||||
for (i in seq_along(headers)) {
|
||||
suffix <- sub("^.*\\.(\\d+)$", "\\1", headers[i])
|
||||
if (suffix %in% 1:3) {
|
||||
exp_name <- labels[as.numeric(suffix), 2]
|
||||
new_labels[i] <- gsub(paste0(".", suffix), paste0("_", exp_name), headers[i])
|
||||
}
|
||||
}
|
||||
new_labels
|
||||
}
|
||||
|
||||
LabelStd <- read.csv(file = args$study_info, stringsAsFactors = FALSE)
|
||||
colnames(shiftOnly) <- relabel_headers(colnames(shiftOnly), LabelStd)
|
||||
colnames(REMcRdy) <- relabel_headers(colnames(REMcRdy), LabelStd)
|
||||
|
||||
# Save relabeled files
|
||||
write.csv(REMcRdy, file.path(args$out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
write.csv(shiftOnly, file.path(args$out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
|
||||
# Save updated parameters
|
||||
LabelStd[, 4] <- args$sd
|
||||
write.csv(LabelStd, file.path(args$out_dir, "parameters.csv"), row.names = FALSE)
|
||||
write.csv(LabelStd, file = args$study_info, row.names = FALSE)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# library("plyr")
|
||||
# library("sos")
|
||||
# library("dplyr")
|
||||
|
||||
# # Function to parse and set arguments
|
||||
# parse_arguments <- function() {
|
||||
# if (interactive()) {
|
||||
# args <- c(
|
||||
# "./", # Default out_dir
|
||||
# "2", # Default SD value
|
||||
# "../Code/StudyInfo.csv" # Default study_info path
|
||||
# )
|
||||
# } else {
|
||||
# args <- commandArgs(trailingOnly = TRUE)
|
||||
# }
|
||||
|
||||
# print(input_files)
|
||||
# print(length(input_files))
|
||||
# list(
|
||||
# out_dir = normalizePath(file.path(args[1]), mustWork = FALSE),
|
||||
# sd = as.numeric(args[2]),
|
||||
# study_info = normalizePath(file.path(args[3]), mustWork = FALSE),
|
||||
# input_dirs = args[4:length(args)]
|
||||
# )
|
||||
# }
|
||||
|
||||
# TODO this is better handled in a loop in case you want to compare more experiments?
|
||||
# The input is already designed for this
|
||||
# Read in the files for your experiment and
|
||||
# Join the two files at a time as a function of how many inputFile
|
||||
# list the larger file first ? in this example X2 has the larger number of genes
|
||||
# If X1 has a larger number of genes, switch the order of X1 and X2
|
||||
if (length(input_files) < 2) {
|
||||
print("Note enough Exps to compare, skipping join")
|
||||
stop("Exiting script")
|
||||
}
|
||||
# args <- parse_arguments()
|
||||
|
||||
if (length(input_files) >= 2) {
|
||||
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
|
||||
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
|
||||
X <- join(X1, X2, by = "OrfRep")
|
||||
OBH <- X[, order(colnames(X))] # OrderByHeader
|
||||
headers <- select(OBH, contains("OrfRep"), matches("Gene"),
|
||||
contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
|
||||
headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
|
||||
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
|
||||
headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column # frame for interleaving Z_lm with Shift colums
|
||||
}
|
||||
# # Create an array for the zscores files
|
||||
# zscores_files <- sapply(args$input_dirs, function(study) {
|
||||
# file_path <- file.path(study, "zscores", "zscores_interaction.csv")
|
||||
# if (file.exists(file_path)) file_path else NULL
|
||||
# }, simplify = TRUE, USE.NAMES = FALSE)
|
||||
|
||||
if (length(input_files) >= 3) {
|
||||
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE)
|
||||
X <- join(X, X3, by = "OrfRep")
|
||||
headSel <- select(headers, -"Gene.1", -"Gene.2")
|
||||
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
||||
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
|
||||
}
|
||||
# # Filter out NULL entries
|
||||
# zscores_files <- zscores_files[!sapply(zscores_files, is.null)]
|
||||
|
||||
if (length(input_files) >= 4) {
|
||||
X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE)
|
||||
X <- join(X, X4, by = "OrfRep")
|
||||
headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
|
||||
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
||||
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
|
||||
}
|
||||
# sprintf("The SD value is: %d", args$sd)
|
||||
# # print(args$input_dirs)
|
||||
|
||||
# print(headers)
|
||||
# headSel$contains("Z_Shift") %>% replace_na(0.001)
|
||||
headers <- colnames(headSel)
|
||||
# print(headers)
|
||||
i <- 0
|
||||
for (i in 1:length(headers)) {
|
||||
if (grepl("Shift", headers[i])) {
|
||||
headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.001
|
||||
}
|
||||
if (grepl("Z_lm_", headers[i])) {
|
||||
headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.0001
|
||||
}
|
||||
}
|
||||
# # TODO this is better handled in a loop in case you want to compare more experiments?
|
||||
# # The input is already designed for this
|
||||
# # Read in the files for your experiment and
|
||||
# # Join the two files at a time as a function of how many inputFile
|
||||
# # list the larger file first ? in this example X2 has the larger number of genes
|
||||
# # If X1 has a larger number of genes, switch the order of X1 and X2
|
||||
# if (length(zscores_files) < 2) {
|
||||
# print("Note enough Exps to compare, skipping join")
|
||||
# stop("Exiting script")
|
||||
# }
|
||||
|
||||
# 2SD option code to exclude Z_lm values less than 2 standard Deviations
|
||||
REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_"))
|
||||
shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift"))
|
||||
# if (length(zscores_files) >= 2) {
|
||||
# X1 <- read.csv(file = zscores_files[1], stringsAsFactors = FALSE)
|
||||
# X2 <- read.csv(file = zscores_files[2], stringsAsFactors = FALSE)
|
||||
# X <- join(X1, X2, by = "OrfRep")
|
||||
# OBH <- X[, order(colnames(X))] # OrderByHeader
|
||||
# headers <- select(OBH, contains("OrfRep"), matches("Gene"),
|
||||
# contains("z_lm_k"), contains("z_shift_k"), contains("z_lm_l"), contains("z_shift_l"))
|
||||
# headSel <- select(headers, -"Gene.1") # remove "Gene.1 column
|
||||
# headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) # frame for interleaving Z_lm with Shift colums
|
||||
# headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column # frame for interleaving Z_lm with Shift colums
|
||||
# }
|
||||
|
||||
# Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
|
||||
Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
|
||||
# if (length(zscores_files) >= 3) {
|
||||
# X3 <- read.csv(file = zscores_files[3], stringsAsFactors = FALSE)
|
||||
# X <- join(X, X3, by = "OrfRep")
|
||||
# headSel <- select(headers, -"Gene.1", -"Gene.2")
|
||||
# headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
||||
# headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
|
||||
# }
|
||||
|
||||
# Using Text search grepl to relabel headers
|
||||
REMcRdyHdr <- colnames(REMcRdy)
|
||||
REMcRdyLabels <- "asdf"
|
||||
shftHdr <- colnames(shiftOnly)
|
||||
shiftLabels <- "asdf"
|
||||
shiftLabels[1:2] <- shftHdr[1:2]
|
||||
REMcRdyLabels[1:2] <- REMcRdyHdr[1:2]
|
||||
# if (length(zscores_files) >= 4) {
|
||||
# X4 <- read.csv(file = zscores_files[4], stringsAsFactors = FALSE)
|
||||
# X <- join(X, X4, by = "OrfRep")
|
||||
# headSel <- select(headers, -"Gene.1", -"Gene.2", -"Gene.3")
|
||||
# headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
||||
# headSel2 <- select(headSel2, -"Gene.1", -"Gene.2", -"Gene.3")
|
||||
# }
|
||||
|
||||
for (i in 3:(length(shftHdr))) {
|
||||
if (i == 3) {
|
||||
shiftLabels[3] <- paste0(Labels[1, 2], ".", shftHdr[3])
|
||||
REMcRdyLabels[3] <- paste0(Labels[1, 2], ".", REMcRdyHdr[3])
|
||||
}
|
||||
if (i == 5) {
|
||||
shiftLabels[5] <- paste0(Labels[1, 2], ".", shftHdr[5])
|
||||
REMcRdyLabels[5] <- paste0(Labels[1, 2], ".", REMcRdyHdr[5])
|
||||
}
|
||||
if (i == 7) {
|
||||
shiftLabels[7] <- paste0(Labels[1, 2], ".", shftHdr[7])
|
||||
REMcRdyLabels[7] <- paste0(Labels[1, 2], ".", REMcRdyHdr[7])
|
||||
}
|
||||
if (grepl(".1", shftHdr[i], fixed = true)) {
|
||||
shiftLabels[i] <- paste0(Labels[2, 2], ".", shftHdr[i])
|
||||
REMcRdyLabels[i] <- paste0(Labels[2, 2], ".", REMcRdyHdr[i])
|
||||
}
|
||||
if (grepl(".2", shftHdr[i], fixed = true)) {
|
||||
shiftLabels[i] < -paste0(Labels[3, 2], ".", shftHdr[i])
|
||||
REMcRdyLabels[i] <- paste0(Labels[3, 2], ".", REMcRdyHdr[i])
|
||||
}
|
||||
if (grepl(".3", shftHdr[i], fixed = true)) {
|
||||
shiftLabels[i] <- paste0(Labels[4, 2], ".", shftHdr[i])
|
||||
REMcRdyLabels[i] <- paste0(Labels[4, 2], ".", REMcRdyHdr[i])
|
||||
}
|
||||
}
|
||||
# # print(headers)
|
||||
# # headSel$contains("Z_Shift") %>% replace_na(0.001)
|
||||
# headers <- colnames(headSel)
|
||||
# # print(headers)
|
||||
# i <- 0
|
||||
# for (i in 1:length(headers)) {
|
||||
# if (grepl("Shift", headers[i])) {
|
||||
# headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.001
|
||||
# }
|
||||
# if (grepl("Z_lm_", headers[i])) {
|
||||
# headSel[headers[i]][is.na(headSel[headers[i]])] <- 0.0001
|
||||
# }
|
||||
# }
|
||||
|
||||
for (i in 3:(length(REMcRdyLabels))) {
|
||||
j <- as.integer(i)
|
||||
REMcRdyLabels[j] <- gsub("[.]", "_", REMcRdyLabels[j])
|
||||
shiftLabels[j] <- gsub("[.]", "_", shiftLabels[j])
|
||||
}
|
||||
# # 2SD option code to exclude Z_lm values less than 2 standard Deviations
|
||||
# REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_lm_"))
|
||||
# shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("z_shift"))
|
||||
|
||||
colnames(shiftOnly) <- shiftLabels
|
||||
colnames(REMcRdy) <- REMcRdyLabels
|
||||
# # Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
|
||||
# Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
|
||||
|
||||
combI <- headSel2 # starting Template orf, Genename columns
|
||||
# # Using Text search grepl to relabel headers
|
||||
# REMcRdyHdr <- colnames(REMcRdy)
|
||||
# REMcRdyLabels <- "asdf"
|
||||
# shftHdr <- colnames(shiftOnly)
|
||||
# shiftLabels <- "asdf"
|
||||
# shiftLabels[1:2] <- shftHdr[1:2]
|
||||
# REMcRdyLabels[1:2] <- REMcRdyHdr[1:2]
|
||||
|
||||
# headersRemc<-colnames(REMcRdy)
|
||||
# Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
|
||||
for (i in 3:length(colnames(REMcRdy))) {
|
||||
combI <- cbind.data.frame(combI, shiftOnly[i])
|
||||
combI <- cbind.data.frame(combI, REMcRdy[i])
|
||||
}
|
||||
# for (i in 3:(length(shftHdr))) {
|
||||
# if (i == 3) {
|
||||
# shiftLabels[3] <- paste0(Labels[1, 2], ".", shftHdr[3])
|
||||
# REMcRdyLabels[3] <- paste0(Labels[1, 2], ".", REMcRdyHdr[3])
|
||||
# }
|
||||
# if (i == 5) {
|
||||
# shiftLabels[5] <- paste0(Labels[1, 2], ".", shftHdr[5])
|
||||
# REMcRdyLabels[5] <- paste0(Labels[1, 2], ".", REMcRdyHdr[5])
|
||||
# }
|
||||
# if (i == 7) {
|
||||
# shiftLabels[7] <- paste0(Labels[1, 2], ".", shftHdr[7])
|
||||
# REMcRdyLabels[7] <- paste0(Labels[1, 2], ".", REMcRdyHdr[7])
|
||||
# }
|
||||
# if (grepl(".1", shftHdr[i], fixed = true)) {
|
||||
# shiftLabels[i] <- paste0(Labels[2, 2], ".", shftHdr[i])
|
||||
# REMcRdyLabels[i] <- paste0(Labels[2, 2], ".", REMcRdyHdr[i])
|
||||
# }
|
||||
# if (grepl(".2", shftHdr[i], fixed = true)) {
|
||||
# shiftLabels[i] < -paste0(Labels[3, 2], ".", shftHdr[i])
|
||||
# REMcRdyLabels[i] <- paste0(Labels[3, 2], ".", REMcRdyHdr[i])
|
||||
# }
|
||||
# if (grepl(".3", shftHdr[i], fixed = true)) {
|
||||
# shiftLabels[i] <- paste0(Labels[4, 2], ".", shftHdr[i])
|
||||
# REMcRdyLabels[i] <- paste0(Labels[4, 2], ".", REMcRdyHdr[i])
|
||||
# }
|
||||
# }
|
||||
|
||||
Vec1 <- NA
|
||||
Vec2 <- NA
|
||||
Vec3 <- NA
|
||||
Vec4 <- NA
|
||||
Vec5 <- NA
|
||||
Vec6 <- NA
|
||||
Vec7 <- NA
|
||||
Vec8 <- NA
|
||||
# for (i in 3:(length(REMcRdyLabels))) {
|
||||
# j <- as.integer(i)
|
||||
# REMcRdyLabels[j] <- gsub("[.]", "_", REMcRdyLabels[j])
|
||||
# shiftLabels[j] <- gsub("[.]", "_", shiftLabels[j])
|
||||
# }
|
||||
|
||||
if (length(REMcRdy) == 6) {
|
||||
Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||
Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||
Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||
Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||
bolVec <- Vec1 | Vec2 | Vec3 | Vec4
|
||||
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||
REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
|
||||
shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
||||
shiftOnlyGT2[, 3:6] <- shiftOnly[bolVec, 3:6]
|
||||
}
|
||||
# colnames(shiftOnly) <- shiftLabels
|
||||
# colnames(REMcRdy) <- REMcRdyLabels
|
||||
|
||||
if (length(REMcRdy) == 8) {
|
||||
Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||
Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||
Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||
Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||
Vec5 <- abs(REMcRdy[, 7]) >= sd
|
||||
Vec6 <- abs(REMcRdy[, 8]) >= sd
|
||||
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
|
||||
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||
REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
|
||||
shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
||||
shiftOnlyGT2[, 3:8] <- shiftOnly[bolVec, 3:8]
|
||||
}
|
||||
# combI <- headSel2 # starting Template orf, Genename columns
|
||||
|
||||
if (length(REMcRdy) == 10) {
|
||||
Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||
Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||
Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||
Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||
Vec5 <- abs(REMcRdy[, 7]) >= sd
|
||||
Vec6 <- abs(REMcRdy[, 8]) >= sd
|
||||
Vec7 <- abs(REMcRdy[, 9]) >= sd
|
||||
Vec8 <- abs(REMcRdy[, 10]) >= sd
|
||||
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
|
||||
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||
REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
|
||||
shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
||||
shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
|
||||
}
|
||||
# # headersRemc<-colnames(REMcRdy)
|
||||
# # Reorder columns to produce an interleaved set of Z_lm and Shift data for all the cpps.
|
||||
# for (i in 3:length(colnames(REMcRdy))) {
|
||||
# combI <- cbind.data.frame(combI, shiftOnly[i])
|
||||
# combI <- cbind.data.frame(combI, REMcRdy[i])
|
||||
# }
|
||||
|
||||
if (sd != 0) {
|
||||
REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)]
|
||||
shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)]
|
||||
}
|
||||
# Vec1 <- NA
|
||||
# Vec2 <- NA
|
||||
# Vec3 <- NA
|
||||
# Vec4 <- NA
|
||||
# Vec5 <- NA
|
||||
# Vec6 <- NA
|
||||
# Vec7 <- NA
|
||||
# Vec8 <- NA
|
||||
|
||||
if (sd == 0) {
|
||||
REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
|
||||
shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
|
||||
}
|
||||
# if (length(REMcRdy) == 6) {
|
||||
# Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||
# Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||
# Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||
# Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||
# bolVec <- Vec1 | Vec2 | Vec3 | Vec4
|
||||
# REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||
# REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
|
||||
# shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
||||
# shiftOnlyGT2[, 3:6] <- shiftOnly[bolVec, 3:6]
|
||||
# }
|
||||
|
||||
# R places hidden "" around the header names. The following
|
||||
# is intended to remove those quote so that the "" do not blow up the Java REMc.
|
||||
# Use ,quote=F in the write.csv statement to fix R output file.
|
||||
# write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
|
||||
write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
# LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
|
||||
# if (length(REMcRdy) == 8) {
|
||||
# Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||
# Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||
# Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||
# Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||
# Vec5 <- abs(REMcRdy[, 7]) >= sd
|
||||
# Vec6 <- abs(REMcRdy[, 8]) >= sd
|
||||
# bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
|
||||
# REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||
# REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
|
||||
# shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
||||
# shiftOnlyGT2[, 3:8] <- shiftOnly[bolVec, 3:8]
|
||||
# }
|
||||
|
||||
LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
|
||||
# print(sd)
|
||||
LabelStd[, 4] <- as.numeric(sd)
|
||||
write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
|
||||
write.csv(LabelStd, file = study_info, row.names = FALSE)
|
||||
# if (length(REMcRdy) == 10) {
|
||||
# Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||
# Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||
# Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||
# Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||
# Vec5 <- abs(REMcRdy[, 7]) >= sd
|
||||
# Vec6 <- abs(REMcRdy[, 8]) >= sd
|
||||
# Vec7 <- abs(REMcRdy[, 9]) >= sd
|
||||
# Vec8 <- abs(REMcRdy[, 10]) >= sd
|
||||
# bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
|
||||
# REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||
# REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
|
||||
# shiftOnlyGT2 <- shiftOnly[bolVec, 1:2]
|
||||
# shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
|
||||
# }
|
||||
|
||||
# if (sd != 0) {
|
||||
# REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)]
|
||||
# shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)]
|
||||
# }
|
||||
|
||||
# if (sd == 0) {
|
||||
# REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
|
||||
# shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
|
||||
# }
|
||||
|
||||
# # R places hidden "" around the header names. The following
|
||||
# # is intended to remove those quote so that the "" do not blow up the Java REMc.
|
||||
# # Use ,quote=F in the write.csv statement to fix R output file.
|
||||
# # write.csv(combI,file.path(out_dir,"CombinedKLzscores.csv"), row.names = FALSE)
|
||||
# write.csv(REMcRdy, file.path(out_dir, "REMcRdy_lm_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
# write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, quote = FALSE)
|
||||
# # LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
|
||||
|
||||
# LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
|
||||
# # print(sd)
|
||||
# LabelStd[, 4] <- as.numeric(sd)
|
||||
# write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
|
||||
# write.csv(LabelStd, file = study_info, row.names = FALSE)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user