Rollup before removing NAs from joinInteractExps.R
This commit is contained in:
@@ -27,7 +27,7 @@ sgd_gene_list <- file.path(args[4])
|
|||||||
input_file <- file.path(args[5])
|
input_file <- file.path(args[5])
|
||||||
out_dir <- file.path(args[6])
|
out_dir <- file.path(args[6])
|
||||||
|
|
||||||
sprintf("The Standard Deviation value is: %f", delta_bg_factor)
|
sprintf("The Standard Deviation value is: %d", delta_bg_factor)
|
||||||
|
|
||||||
out_dir_qc <- file.path(out_dir, "qc")
|
out_dir_qc <- file.path(out_dir, "qc")
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ if (length(args) >= 2) {
|
|||||||
sd <- 2 # default value
|
sd <- 2 # default value
|
||||||
}
|
}
|
||||||
|
|
||||||
sprintf("SD value is: %f", sd)
|
sprintf("SD value is: %d", sd)
|
||||||
|
|
||||||
# Set study_info file
|
# Set study_info file
|
||||||
if (length(args) >= 3) {
|
if (length(args) >= 3) {
|
||||||
@@ -30,15 +30,23 @@ if (length(args) >= 3) {
|
|||||||
study_info <- "../Code/StudyInfo.csv" # for legacy workflow
|
study_info <- "../Code/StudyInfo.csv" # for legacy workflow
|
||||||
}
|
}
|
||||||
|
|
||||||
studies <- args[3:length(args)]
|
studies <- args[4:length(args)]
|
||||||
|
print(studies)
|
||||||
input_files <- c()
|
input_files <- c()
|
||||||
for (study in 1:length(studies)) {
|
for (i in seq_along(studies)) {
|
||||||
|
study <- studies[i]
|
||||||
zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
|
zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
|
||||||
if (file.exists(zs_file)) {
|
if (file.exists(zs_file)) {
|
||||||
input_files[study] <- zs_file
|
input_files[i] <- zs_file
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
rm(zs_file, study)
|
||||||
|
|
||||||
|
for (var in ls()) {
|
||||||
|
print(paste(var, ":", get(var)))
|
||||||
|
}
|
||||||
|
|
||||||
|
print(input_files)
|
||||||
print(length(input_files))
|
print(length(input_files))
|
||||||
|
|
||||||
# TODO this is better handled in a loop in case you want to compare more experiments?
|
# TODO this is better handled in a loop in case you want to compare more experiments?
|
||||||
@@ -47,7 +55,12 @@ print(length(input_files))
|
|||||||
# Join the two files at a time as a function of how many inputFile
|
# Join the two files at a time as a function of how many inputFile
|
||||||
# list the larger file first ? in this example X2 has the larger number of genes
|
# list the larger file first ? in this example X2 has the larger number of genes
|
||||||
# If X1 has a larger number of genes, switch the order of X1 and X2
|
# If X1 has a larger number of genes, switch the order of X1 and X2
|
||||||
if (length(input_files) == 2) {
|
if (length(input_files) == 1) {
|
||||||
|
print("Only one experiment to compare, skipping join")
|
||||||
|
stop("Exiting script")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (length(input_files) >= 2) {
|
||||||
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
|
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
|
||||||
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
|
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
|
||||||
X <- join(X1, X2, by = "OrfRep")
|
X <- join(X1, X2, by = "OrfRep")
|
||||||
@@ -57,11 +70,10 @@ if (length(input_files) == 2) {
|
|||||||
headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column
|
headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column
|
||||||
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums
|
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums
|
||||||
headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums
|
headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums
|
||||||
} else if (length(input_files) == 3) {
|
}
|
||||||
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
|
|
||||||
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
|
if (length(input_files) >= 3) {
|
||||||
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
|
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE)
|
||||||
X <- join(X1, X2, by = "OrfRep")
|
|
||||||
X <- join(X, X3, by = "OrfRep")
|
X <- join(X, X3, by = "OrfRep")
|
||||||
OBH <- X[, order(colnames(X))] # OrderByHeader
|
OBH <- X[, order(colnames(X))] # OrderByHeader
|
||||||
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
|
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
|
||||||
@@ -69,14 +81,10 @@ if (length(input_files) == 2) {
|
|||||||
headSel <- select(headSel, -"Gene.1", -"Gene.2")
|
headSel <- select(headSel, -"Gene.1", -"Gene.2")
|
||||||
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
|
||||||
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
|
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
|
||||||
|
}
|
||||||
} else if (length(input_files) == 4) {
|
|
||||||
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
|
if (length(input_files) >= 4) {
|
||||||
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
|
X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE)
|
||||||
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
|
|
||||||
X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE) # exp4File,stringsAsFactors = FALSE)
|
|
||||||
X <- join(X1, X2, by = "OrfRep")
|
|
||||||
X <- join(X, X3, by = "OrfRep")
|
|
||||||
X <- join(X, X4, by = "OrfRep")
|
X <- join(X, X4, by = "OrfRep")
|
||||||
OBH <- X[, order(colnames(X))] # OrderByHeader
|
OBH <- X[, order(colnames(X))] # OrderByHeader
|
||||||
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
|
headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
|
||||||
@@ -103,7 +111,7 @@ REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("Z_lm_
|
|||||||
shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("Z_Shift"))
|
shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("Z_Shift"))
|
||||||
|
|
||||||
# Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
|
# Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
|
||||||
Labels <- read.csv(file = "../Code/StudyInfo.csv", stringsAsFactors = FALSE, sep = ",")
|
Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
|
||||||
|
|
||||||
# Using Text search grepl to relabel headers
|
# Using Text search grepl to relabel headers
|
||||||
REMcRdyHdr <- colnames(REMcRdy)
|
REMcRdyHdr <- colnames(REMcRdy)
|
||||||
@@ -168,10 +176,10 @@ Vec7 <- NA
|
|||||||
Vec8 <- NA
|
Vec8 <- NA
|
||||||
|
|
||||||
if (length(REMcRdy) == 6) {
|
if (length(REMcRdy) == 6) {
|
||||||
Vec1 <- abs(REMcRdy[, 3]) >= std
|
Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||||
Vec2 <- abs(REMcRdy[, 4]) >= std
|
Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||||
Vec3 <- abs(REMcRdy[, 5]) >= std
|
Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||||
Vec4 <- abs(REMcRdy[, 6]) >= std
|
Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||||
bolVec <- Vec1 | Vec2 | Vec3 | Vec4
|
bolVec <- Vec1 | Vec2 | Vec3 | Vec4
|
||||||
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||||
REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
|
REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
|
||||||
@@ -180,12 +188,12 @@ if (length(REMcRdy) == 6) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (length(REMcRdy) == 8) {
|
if (length(REMcRdy) == 8) {
|
||||||
Vec1 <- abs(REMcRdy[, 3]) >= std
|
Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||||
Vec2 <- abs(REMcRdy[, 4]) >= std
|
Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||||
Vec3 <- abs(REMcRdy[, 5]) >= std
|
Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||||
Vec4 <- abs(REMcRdy[, 6]) >= std
|
Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||||
Vec5 <- abs(REMcRdy[, 7]) >= std
|
Vec5 <- abs(REMcRdy[, 7]) >= sd
|
||||||
Vec6 <- abs(REMcRdy[, 8]) >= std
|
Vec6 <- abs(REMcRdy[, 8]) >= sd
|
||||||
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
|
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
|
||||||
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||||
REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
|
REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
|
||||||
@@ -194,14 +202,14 @@ if (length(REMcRdy) == 8) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (length(REMcRdy) == 10) {
|
if (length(REMcRdy) == 10) {
|
||||||
Vec1 <- abs(REMcRdy[, 3]) >= std
|
Vec1 <- abs(REMcRdy[, 3]) >= sd
|
||||||
Vec2 <- abs(REMcRdy[, 4]) >= std
|
Vec2 <- abs(REMcRdy[, 4]) >= sd
|
||||||
Vec3 <- abs(REMcRdy[, 5]) >= std
|
Vec3 <- abs(REMcRdy[, 5]) >= sd
|
||||||
Vec4 <- abs(REMcRdy[, 6]) >= std
|
Vec4 <- abs(REMcRdy[, 6]) >= sd
|
||||||
Vec5 <- abs(REMcRdy[, 7]) >= std
|
Vec5 <- abs(REMcRdy[, 7]) >= sd
|
||||||
Vec6 <- abs(REMcRdy[, 8]) >= std
|
Vec6 <- abs(REMcRdy[, 8]) >= sd
|
||||||
Vec7 <- abs(REMcRdy[, 9]) >= std
|
Vec7 <- abs(REMcRdy[, 9]) >= sd
|
||||||
Vec8 <- abs(REMcRdy[, 10]) >= std
|
Vec8 <- abs(REMcRdy[, 10]) >= sd
|
||||||
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
|
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
|
||||||
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
|
||||||
REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
|
REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
|
||||||
@@ -209,12 +217,12 @@ if (length(REMcRdy) == 10) {
|
|||||||
shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
|
shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
|
||||||
}
|
}
|
||||||
|
|
||||||
if (std != 0) {
|
if (sd != 0) {
|
||||||
REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)]
|
REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)]
|
||||||
shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)]
|
shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)]
|
||||||
}
|
}
|
||||||
|
|
||||||
if (std == 0) {
|
if (sd == 0) {
|
||||||
REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
|
REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
|
||||||
shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
|
shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
|
||||||
}
|
}
|
||||||
@@ -228,7 +236,7 @@ write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, qu
|
|||||||
#LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
|
#LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
|
||||||
|
|
||||||
LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
|
LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
|
||||||
print(std)
|
print(sd)
|
||||||
LabelStd[, 4] <- as.numeric(std)
|
LabelStd[, 4] <- as.numeric(sd)
|
||||||
write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
|
write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
|
||||||
write.csv(LabelStd, file = study_info, row.names = FALSE)
|
write.csv(LabelStd, file = study_info, row.names = FALSE)
|
||||||
|
|||||||
@@ -139,7 +139,7 @@ print_help() {
|
|||||||
# `--project`, `--module`, `--nomodule`, and `--wrapper` can be passed multiple times or with a comma-separated string
|
# `--project`, `--module`, `--nomodule`, and `--wrapper` can be passed multiple times or with a comma-separated string
|
||||||
# @option -p<value> | --project=<value> One or more projects to analyze, can be passed multiple times or with a comma-separated string
|
# @option -p<value> | --project=<value> One or more projects to analyze, can be passed multiple times or with a comma-separated string
|
||||||
# @option -m<value> | --module=<value> One or more modules to run (default: all), can be passed multiple times or with a comma-separated string
|
# @option -m<value> | --module=<value> One or more modules to run (default: all), can be passed multiple times or with a comma-separated string
|
||||||
# @option -w<value> | --wrapper=<value> Requires two arguments: the name of the wrapper and its arguments, can be passed multiple times
|
# @option -w<value> | --wrapper=<value> One or more wrappers and its arguments to run, can be passed multiple times or with a comma-separated string
|
||||||
# @option -n<value> | --nomodule=<value> One or more modules (default: none) to exclude from the analysis
|
# @option -n<value> | --nomodule=<value> One or more modules (default: none) to exclude from the analysis
|
||||||
# @option --markdown Generate the shdoc markdown file for this program
|
# @option --markdown Generate the shdoc markdown file for this program
|
||||||
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
|
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
|
||||||
@@ -163,20 +163,22 @@ parse_input() {
|
|||||||
case $1 in
|
case $1 in
|
||||||
--project|-p)
|
--project|-p)
|
||||||
shift
|
shift
|
||||||
|
declare -ga PROJECTS
|
||||||
IFS=',' read -ra PROJECTS <<< "$1"
|
IFS=',' read -ra PROJECTS <<< "$1"
|
||||||
;;
|
;;
|
||||||
--module|-m)
|
--module|-m)
|
||||||
shift
|
shift
|
||||||
|
declare -ga MODULES
|
||||||
IFS=',' read -ra MODULES <<< "$1"
|
IFS=',' read -ra MODULES <<< "$1"
|
||||||
;;
|
;;
|
||||||
--wrapper|-w)
|
--wrapper|-w)
|
||||||
shift
|
shift
|
||||||
IFS=',' read -ra WRAPPERS <<< "$1"
|
declare -ga WRAPPERS
|
||||||
shift
|
|
||||||
IFS=',' read -ra WRAPPERS <<< "$1"
|
IFS=',' read -ra WRAPPERS <<< "$1"
|
||||||
;;
|
;;
|
||||||
--nomodule|-n)
|
--nomodule|-n)
|
||||||
shift
|
shift
|
||||||
|
declare -ga EXCLUDE_MODULES
|
||||||
IFS=',' read -ra EXCLUDE_MODULES <<< "$1"
|
IFS=',' read -ra EXCLUDE_MODULES <<< "$1"
|
||||||
;;
|
;;
|
||||||
--markdown)
|
--markdown)
|
||||||
@@ -336,6 +338,10 @@ execute() {
|
|||||||
|
|
||||||
# @description Backup one or more files to an incremented .bk file
|
# @description Backup one or more files to an incremented .bk file
|
||||||
#
|
#
|
||||||
|
# **TODO**
|
||||||
|
#
|
||||||
|
# * Make backups hidden by prepending "."?
|
||||||
|
#
|
||||||
# @exitcode backup iterator max 255
|
# @exitcode backup iterator max 255
|
||||||
# @internal
|
# @internal
|
||||||
backup() {
|
backup() {
|
||||||
@@ -343,8 +349,8 @@ backup() {
|
|||||||
for f in "$@"; do
|
for f in "$@"; do
|
||||||
[[ -e $f ]] || continue
|
[[ -e $f ]] || continue
|
||||||
count=1
|
count=1
|
||||||
while [[ -f $f.bk.$count ]]; do
|
while [[ -e $f.bk.$count ]]; do
|
||||||
count=$((count++))
|
((count++))
|
||||||
done
|
done
|
||||||
echo "Backing up $f to $f.bk.$count"
|
echo "Backing up $f to $f.bk.$count"
|
||||||
debug "rsync -a $f $f.bk.$count"
|
debug "rsync -a $f $f.bk.$count"
|
||||||
@@ -525,7 +531,7 @@ interactive_header() {
|
|||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Module selection
|
# Module selection
|
||||||
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 ]]; then
|
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
|
||||||
cat <<-EOF
|
cat <<-EOF
|
||||||
${underline}Enter modules(s) to run${nounderline}
|
${underline}Enter modules(s) to run${nounderline}
|
||||||
* <Enter> for all
|
* <Enter> for all
|
||||||
@@ -554,7 +560,7 @@ interactive_header() {
|
|||||||
# If we're just installing dependencies, skip the rest
|
# If we're just installing dependencies, skip the rest
|
||||||
[[ ${MODULES[*]} == "install_dependencies" ]] && return 0
|
[[ ${MODULES[*]} == "install_dependencies" ]] && return 0
|
||||||
|
|
||||||
# Submodule selection
|
# Wrapper selection
|
||||||
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
|
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
|
||||||
while :; do
|
while :; do
|
||||||
cat <<-EOF
|
cat <<-EOF
|
||||||
@@ -1335,7 +1341,7 @@ qhtcp() {
|
|||||||
[[ -d $QHTCP_RESULTS_DIR ]] ||
|
[[ -d $QHTCP_RESULTS_DIR ]] ||
|
||||||
err "$QHTCP_RESULTS_DIR does not exist, have you run the init_project module?"
|
err "$QHTCP_RESULTS_DIR does not exist, have you run the init_project module?"
|
||||||
|
|
||||||
# Sets STUDIES_NUMS and STUDIES_DIRS
|
# Sets STUDIES
|
||||||
study_info
|
study_info
|
||||||
|
|
||||||
choose_easy_results "$EASY_OUT_DIR"
|
choose_easy_results "$EASY_OUT_DIR"
|
||||||
@@ -1348,22 +1354,24 @@ qhtcp() {
|
|||||||
# # TODO Add them all to StudiesDataArchive?
|
# # TODO Add them all to StudiesDataArchive?
|
||||||
# # Probably better to always add and remove dupes later since each invocation "counts"?
|
# # Probably better to always add and remove dupes later since each invocation "counts"?
|
||||||
# for f in "${EASY_RESULTS_FILES[@]}"; do
|
# for f in "${EASY_RESULTS_FILES[@]}"; do
|
||||||
# for s in "${STUDIES_NUMS[@]}"; do
|
# for study in "${STUDIES[@]}"; do
|
||||||
|
# read -r num sd dir <<< "$study"
|
||||||
# # Trying to match old ExpFrontend formatting
|
# # Trying to match old ExpFrontend formatting
|
||||||
# printf "%s\t" \
|
# printf "%s\t" \
|
||||||
# "${DATE//_/}" "$PROJECT_NAME" "$QHTCP_RESULTS_DIR" "Exp$s" \
|
# "${DATE//_/}" "$PROJECT_NAME" "$QHTCP_RESULTS_DIR" "Exp$num" \
|
||||||
# "$PROJECT_DATE" "$PROJECT_SCANS_DIR" "$EASY_RESULTS_DIR" "${f##*/}" \
|
# "$PROJECT_DATE" "$PROJECT_SCANS_DIR" "$EASY_RESULTS_DIR" "${f##*/}" \
|
||||||
# >> "$STUDIES_ARCHIVE_FILE"
|
# >> "$STUDIES_ARCHIVE_FILE"
|
||||||
# done
|
# done
|
||||||
# done
|
# done
|
||||||
|
|
||||||
# Run R interactions script on all studies
|
# Run R interactions script on all studies
|
||||||
for s in "${STUDIES_NUMS[@]}"; do
|
for study in "${STUDIES[@]}"; do
|
||||||
[[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores ]] ||
|
read -r num sd dir <<< "$study"
|
||||||
execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores"
|
[[ -d $dir/zscores ]] ||
|
||||||
[[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores/qc ]] ||
|
execute mkdir "$dir/zscores"
|
||||||
execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores/qc"
|
[[ -d $dir/zscores/qc ]] ||
|
||||||
r_interactions "$s"
|
execute mkdir "$dir/zscores/qc"
|
||||||
|
r_interactions "$num" "$sd"
|
||||||
done \
|
done \
|
||||||
&& remc \
|
&& remc \
|
||||||
&& gtf \
|
&& gtf \
|
||||||
@@ -1384,13 +1392,12 @@ module remc
|
|||||||
remc() {
|
remc() {
|
||||||
debug "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
|
|
||||||
# Sets STUDIES_NUMS and STUDIES_DIRS
|
# Sets STUDIES
|
||||||
study_info
|
study_info
|
||||||
|
|
||||||
# If any wrappers fail the rest will not run, this is fundamental to module design
|
# If any wrappers fail the rest will not run, this is fundamental to module design
|
||||||
# Remove leading && to run regardless
|
# Remove leading && to run regardless
|
||||||
r_join_interactions \
|
r_join_interactions \
|
||||||
"${STUDIES_DIRS[@]}" \
|
|
||||||
&& java_extract \
|
&& java_extract \
|
||||||
&& r_add_shift_values \
|
&& r_add_shift_values \
|
||||||
&& r_create_heat_maps \
|
&& r_create_heat_maps \
|
||||||
@@ -1453,36 +1460,40 @@ module gta
|
|||||||
gta() {
|
gta() {
|
||||||
debug "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
|
|
||||||
gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
|
# gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
|
||||||
gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
|
gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
|
||||||
sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}"
|
sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}"
|
||||||
all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"
|
all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"
|
||||||
|
|
||||||
# TODO This could be wrong, it could be in main results
|
# TODO This could be wrong, it could be in main results
|
||||||
|
|
||||||
# Sets STUDIES_NUMS and STUDIES_DIRS
|
# Sets STUDIES
|
||||||
study_info
|
study_info
|
||||||
|
|
||||||
[[ -d $GTA_OUT_DIR ]] && backup "$GTA_OUT_DIR"
|
[[ -d $GTA_OUT_DIR ]] && backup "$GTA_OUT_DIR"
|
||||||
execute mkdir "$GTA_OUT_DIR"
|
execute mkdir "$GTA_OUT_DIR"
|
||||||
|
|
||||||
# Loop over the array and create pairwise arrays
|
# Loop over the array and create pairwise arrays
|
||||||
for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do
|
for ((i=0; i<${#STUDIES[@]}; i++)); do
|
||||||
for ((j=i+1; j<${#STUDIES_NUMS[@]}; j++)); do
|
for ((j=i+1; j<${#STUDIES[@]}; j++)); do
|
||||||
pair=("${STUDIES_NUMS[i]}" "${STUDIES_NUMS[j]}")
|
read -r num1 _ _ <<< "${STUDIES[i]}"
|
||||||
|
read -r num2 _ _ <<< "${STUDIES[j]}"
|
||||||
|
pair=("$num1" "$num2")
|
||||||
echo "${pair[@]}"
|
echo "${pair[@]}"
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
# Create unique parwise combinations of study nums from dir names
|
# Create unique parwise combinations of study nums from dir names
|
||||||
study_combos=()
|
study_combos=()
|
||||||
for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do
|
for ((i=0; i<${#STUDIES[@]}; i++)); do
|
||||||
# Loop through the array again
|
# Loop through the array again
|
||||||
for ((j=0; j<${#STUDIES_NUMS[@]}; j++)); do
|
for ((j=0; j<${#STUDIES[@]}; j++)); do
|
||||||
# If the indices are not the same
|
# If the indices are not the same
|
||||||
if [ "$i" != "$j" ]; then
|
if [ "$i" != "$j" ]; then
|
||||||
# Print the unique combination
|
# Print the unique combination
|
||||||
study_combos+=("${STUDIES_NUMS[$i]},${STUDIES_NUMS[$j]}")
|
read -r num1 _ _ <<< "${STUDIES[i]}"
|
||||||
|
read -r num2 _ _ <<< "${STUDIES[j]}"
|
||||||
|
study_combos+=("$num1,$num2")
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
@@ -1490,11 +1501,12 @@ gta() {
|
|||||||
# The following are three types of studies
|
# The following are three types of studies
|
||||||
|
|
||||||
# Individual studies
|
# Individual studies
|
||||||
for s in "${STUDIES_NUMS[@]}"; do
|
for study in "${STUDIES[@]}"; do
|
||||||
zscores_file="$QHTCP_RESULTS_DIR/Exp$s/$zscores_file"
|
read -r num _ dir <<< "$study"
|
||||||
|
zscores_file="$dir/zscores/zscores_interaction.csv"
|
||||||
if [[ -f $zscores_file ]]; then
|
if [[ -f $zscores_file ]]; then
|
||||||
mkdir "$GTA_OUT_DIR/Exp$s"
|
mkdir "$GTA_OUT_DIR/Exp$num"
|
||||||
r_gta "Exp$s" "$zscores_file"
|
r_gta "Exp$num" "$zscores_file"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
@@ -1507,6 +1519,12 @@ gta() {
|
|||||||
|
|
||||||
# All studies
|
# All studies
|
||||||
# All preceding arguments are required so we can pass multiple studies
|
# All preceding arguments are required so we can pass multiple studies
|
||||||
|
declare -a nums
|
||||||
|
for study in "${STUDIES[@]}"; do
|
||||||
|
read -r num _ _ <<< "$study"
|
||||||
|
nums+=("$num")
|
||||||
|
done
|
||||||
|
|
||||||
r_gta_heatmaps \
|
r_gta_heatmaps \
|
||||||
"$STUDY_INFO_FILE" \
|
"$STUDY_INFO_FILE" \
|
||||||
"$gene_ontology_obo" \
|
"$gene_ontology_obo" \
|
||||||
@@ -1514,7 +1532,7 @@ gta() {
|
|||||||
"$all_sgd_terms_csv" \
|
"$all_sgd_terms_csv" \
|
||||||
"$QHTCP_RESULTS_DIR" \
|
"$QHTCP_RESULTS_DIR" \
|
||||||
"$QHTCP_RESULTS_DIR/TermSpecificHeatmaps" \
|
"$QHTCP_RESULTS_DIR/TermSpecificHeatmaps" \
|
||||||
"${STUDIES_NUMS[@]}"
|
"${nums[@]}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1714,7 +1732,7 @@ wrapper r_interactions
|
|||||||
# @arg $3 string study info file
|
# @arg $3 string study info file
|
||||||
# @arg $4 string SGD_features.tab
|
# @arg $4 string SGD_features.tab
|
||||||
# @arg $5 string easy/results_std.txt
|
# @arg $5 string easy/results_std.txt
|
||||||
# @arg $6 string zscores directory
|
# @arg $6 string output directory
|
||||||
r_interactions() {
|
r_interactions() {
|
||||||
debug "Running: ${FUNCNAME[0]} $*"
|
debug "Running: ${FUNCNAME[0]} $*"
|
||||||
cat <<-EOF
|
cat <<-EOF
|
||||||
@@ -1729,7 +1747,11 @@ r_interactions() {
|
|||||||
* Background values are reported in the results sheet and so could also be analyzed there.
|
* Background values are reported in the results sheet and so could also be analyzed there.
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
script="$APPS_DIR/r/interactions.R"
|
declare script="$APPS_DIR/r/interactions.R"
|
||||||
|
declare out_dir="${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}"
|
||||||
|
|
||||||
|
[[ -d $out_dir ]] && backup "$out_dir"
|
||||||
|
mkdir "$out_dir"
|
||||||
|
|
||||||
execute "$RSCRIPT" "$script" \
|
execute "$RSCRIPT" "$script" \
|
||||||
"$1" \
|
"$1" \
|
||||||
@@ -1737,12 +1759,15 @@ r_interactions() {
|
|||||||
"${3:-"$STUDY_INFO_FILE"}" \
|
"${3:-"$STUDY_INFO_FILE"}" \
|
||||||
"${4:-"$APPS_DIR/r/SGD_features.tab"}" \
|
"${4:-"$APPS_DIR/r/SGD_features.tab"}" \
|
||||||
"${5:-"$EASY_RESULTS_DIR/results_std.txt"}" \
|
"${5:-"$EASY_RESULTS_DIR/results_std.txt"}" \
|
||||||
"${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}" \
|
"$out_dir" \
|
||||||
"${@:7}" # future arguments
|
"${@:7}" # future arguments
|
||||||
|
|
||||||
|
[[ -f "$out_dir/zscores_interaction.csv" ]] || (echo "$out_dir/zscores_interaction.csv does not exist"; return 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
wrapper r_join_interactions
|
wrapper r_join_interactions
|
||||||
|
# shellcheck disable=SC2120
|
||||||
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
|
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
|
||||||
#
|
#
|
||||||
# TODO
|
# TODO
|
||||||
@@ -1760,20 +1785,41 @@ wrapper r_join_interactions
|
|||||||
# * Shift_only.csv
|
# * Shift_only.csv
|
||||||
# * parameters.csv
|
# * parameters.csv
|
||||||
#
|
#
|
||||||
# @arg $1 string output directory
|
# @arg $1 string output directory (required)
|
||||||
# @arg $2 string sd value (default: 2)
|
# @arg $2 string sd value (default: 2) (required)
|
||||||
# @arg $3 string study info file
|
# @arg $3 string study info file (required)
|
||||||
|
# @arg $4 array studies (required)
|
||||||
r_join_interactions() {
|
r_join_interactions() {
|
||||||
debug "Running: ${FUNCNAME[0]} $*"
|
debug "Running: ${FUNCNAME[0]} $*"
|
||||||
script="$APPS_DIR/r/joinInteractExps.R"
|
declare script="$APPS_DIR/r/joinInteractExps.R"
|
||||||
|
declare -a dirs
|
||||||
|
declare -a out_files=(
|
||||||
|
"${1:-$QHTCP_RESULTS_DIR}/REMcRdy_lm_only.csv"
|
||||||
|
"${1:-$QHTCP_RESULTS_DIR}/Shift_only.csv"
|
||||||
|
"${1:-$QHTCP_RESULTS_DIR}/parameters.csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
((DEBUG)) && declare -p
|
||||||
|
|
||||||
|
backup "${out_files[@]}"
|
||||||
|
|
||||||
|
# If user provides study dirs, use those
|
||||||
|
if [[ $# -gt 3 ]]; then
|
||||||
|
dirs=("${@:4}")
|
||||||
|
else
|
||||||
|
study_info
|
||||||
|
for study in "${STUDIES[@]}"; do
|
||||||
|
read -r _ _ dir <<< "$study"
|
||||||
|
dirs+=("$dir")
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
execute "$RSCRIPT" "$script" \
|
execute "$RSCRIPT" "$script" \
|
||||||
"${1:-$QHTCP_RESULTS_DIR}" \
|
"${1:-$QHTCP_RESULTS_DIR}" \
|
||||||
"${2:-2}" \
|
"${2:-2}" \
|
||||||
"${3:-$STUDY_INFO_FILE}" \
|
"${3:-$STUDY_INFO_FILE}" \
|
||||||
"${@:4:-${STUDIES_DIRS[@]}}"
|
"${dirs[@]}"
|
||||||
|
|
||||||
local out_files=("$1/REMcRdy_lm_only.csv" "$1/Shift_only.csv" "$1/parameters.csv")
|
|
||||||
for f in "${out_files[@]}"; do
|
for f in "${out_files[@]}"; do
|
||||||
[[ -f $f ]] || (echo "$f does not exist"; return 1)
|
[[ -f $f ]] || (echo "$f does not exist"; return 1)
|
||||||
done
|
done
|
||||||
@@ -1816,6 +1862,9 @@ java_extract() {
|
|||||||
"${2:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv"}"
|
"${2:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv"}"
|
||||||
"${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
|
"${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
|
||||||
"${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
|
"${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
|
||||||
|
1
|
||||||
|
true
|
||||||
|
true
|
||||||
)
|
)
|
||||||
|
|
||||||
debug "pushd && ${java_cmd[*]} && popd"
|
debug "pushd && ${java_cmd[*]} && popd"
|
||||||
@@ -2021,8 +2070,7 @@ r_compile_gtf() {
|
|||||||
#
|
#
|
||||||
# @exitcode 0 If one or more studies found
|
# @exitcode 0 If one or more studies found
|
||||||
# @exitcode 1 If no studies found
|
# @exitcode 1 If no studies found
|
||||||
# @set STUDIES_NUMS array contains Exp numbers
|
# @set STUDIES array contains array of "Exp# sd ExpDir"
|
||||||
# @set STUDIES_DIRS array contains Exp directories
|
|
||||||
study_info() {
|
study_info() {
|
||||||
debug "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
|
|
||||||
@@ -2116,26 +2164,28 @@ study_info() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# Read study info file
|
# Read study info file
|
||||||
while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column
|
declare -ga STUDIES
|
||||||
STUDIES_NUMS+=("$col1")
|
while IFS=',' read -r num _ sd _; do
|
||||||
|
STUDIES+=("$num $sd $QHTCP_RESULTS_DIR/Exp$num")
|
||||||
done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header
|
done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header
|
||||||
|
|
||||||
# Initialize missing Exp dirs
|
# Initialize missing Exp dirs
|
||||||
STUDIES_DIRS=()
|
for study in "${STUDIES[@]}"; do
|
||||||
for s in "${STUDIES_NUMS[@]}"; do
|
read -r _ _ dir <<< "$study"
|
||||||
study_dir="$QHTCP_RESULTS_DIR/Exp$s"
|
[[ -d $dir ]] || mkdir "$dir"
|
||||||
STUDIES_DIRS+=("$study_dir")
|
|
||||||
[[ -d $study_dir ]] || mkdir "$study_dir"
|
|
||||||
|
|
||||||
# We don't need a template anymore?
|
|
||||||
# if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
|
|
||||||
# err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
|
|
||||||
# continue
|
|
||||||
# fi
|
|
||||||
done
|
done
|
||||||
|
|
||||||
|
# # We don't need a template anymore?
|
||||||
|
# # if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
|
||||||
|
# # err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
|
||||||
|
# # continue
|
||||||
|
# # fi
|
||||||
|
# done
|
||||||
|
|
||||||
|
((DEBUG)) && declare -p STUDIES
|
||||||
|
|
||||||
# Return true if at least one study was found
|
# Return true if at least one study was found
|
||||||
[[ ${#STUDIES_NUMS[@]} -gt 0 ]]
|
[[ ${#STUDIES[@]} -gt 0 ]]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2287,6 +2337,8 @@ main() {
|
|||||||
|
|
||||||
parse_input "$@" # parse arguments with getopt
|
parse_input "$@" # parse arguments with getopt
|
||||||
|
|
||||||
|
# ((DEBUG)) && declare -p
|
||||||
|
|
||||||
interactive_header "$@"
|
interactive_header "$@"
|
||||||
|
|
||||||
# # Prompt user for the PROJECT if we still don't have one
|
# # Prompt user for the PROJECT if we still don't have one
|
||||||
@@ -2345,14 +2397,8 @@ main() {
|
|||||||
declare -gx GTA_OUT_DIR="$QHTCP_RESULTS_DIR/gta"
|
declare -gx GTA_OUT_DIR="$QHTCP_RESULTS_DIR/gta"
|
||||||
declare -gx GTF_OUT_DIR="$QHTCP_RESULTS_DIR/gtf"
|
declare -gx GTF_OUT_DIR="$QHTCP_RESULTS_DIR/gtf"
|
||||||
declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"}
|
declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"}
|
||||||
if ((DEBUG)); then
|
|
||||||
echo "Debug:"
|
# ((DEBUG)) && declare -p
|
||||||
declare -p SCANS_DIR OUT_DIR TEMPLATES_DIR APPS_DIR \
|
|
||||||
PROJECTS PROJECT_NAME \
|
|
||||||
PROJECT_SCANS_DIR PROJECT_DATE PROJECT_SUFFIX PROJECT_USER \
|
|
||||||
STUDIES_ARCHIVE_FILE QHTCP_RESULTS_DIR QHTCP_TEMPLATE_DIR \
|
|
||||||
STUDY_INFO_FILE EASY_RESULTS_DIR R_LIBS_USER
|
|
||||||
fi
|
|
||||||
|
|
||||||
debug "Active modules: ${MODULES[*]}"
|
debug "Active modules: ${MODULES[*]}"
|
||||||
debug "Active wrappers and their args: ${WRAPPERS[*]}"
|
debug "Active wrappers and their args: ${WRAPPERS[*]}"
|
||||||
@@ -2365,21 +2411,19 @@ main() {
|
|||||||
done
|
done
|
||||||
|
|
||||||
# Run selected wrappers
|
# Run selected wrappers
|
||||||
for i in "${!WRAPPERS[@]}"; do
|
for wrapper in "${WRAPPERS[@]}"; do
|
||||||
IFS=',' read -ra args <<< "${WRAPPERS[$((i+1))]}" # load the command args
|
IFS=',' read -ra args <<< "$wrapper" # load the command args
|
||||||
if ask "Run ${WRAPPERS[i]} wrapper with args ${args[*]}?"; then
|
if ask "Run ${args[0]} wrapper with args ${args[*]:1}?"; then
|
||||||
"${WRAPPERS[i]}" "${args[@]}" || return 1
|
"${args[0]}" "${args[@]:1}" || return 1
|
||||||
fi
|
fi
|
||||||
continue 2 # skip the command string
|
|
||||||
done
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
cat <<-EOF
|
[[ ${#MODULES[@]} -gt 0 ]] && echo "Successfully ran module(s): ${MODULES[*]}"
|
||||||
Successfully ran module(s): ${MODULES[*]}
|
[[ ${#WRAPPERS[@]} -gt 0 ]] && echo "Successfully ran wrapper(s): ${WRAPPERS[*]}"
|
||||||
And wrapper(s): ${WRAPPERS[*]}
|
[[ ${#PROJECTS[@]} -gt 0 ]] && echo "On project(s): ${PROJECTS[*]}"
|
||||||
On project(s): ${PROJECTS[*]}
|
|
||||||
EOF
|
unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES SET_STUDIES YES
|
||||||
unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES_NUMS STUDIES_DIRS SET_STUDIES YES
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# (Safe) main loop
|
# (Safe) main loop
|
||||||
|
|||||||
Reference in New Issue
Block a user