Rollup before removing NAs from joinInteractExps.R

This commit is contained in:
2024-08-15 15:02:53 -04:00
parent 6992d5eec0
commit 38b3f66695
3 changed files with 169 additions and 117 deletions

View File

@@ -27,7 +27,7 @@ sgd_gene_list <- file.path(args[4])
input_file <- file.path(args[5]) input_file <- file.path(args[5])
out_dir <- file.path(args[6]) out_dir <- file.path(args[6])
sprintf("The Standard Deviation value is: %f", delta_bg_factor) sprintf("The Standard Deviation value is: %d", delta_bg_factor)
out_dir_qc <- file.path(out_dir, "qc") out_dir_qc <- file.path(out_dir, "qc")

View File

@@ -21,7 +21,7 @@ if (length(args) >= 2) {
sd <- 2 # default value sd <- 2 # default value
} }
sprintf("SD value is: %f", sd) sprintf("SD value is: %d", sd)
# Set study_info file # Set study_info file
if (length(args) >= 3) { if (length(args) >= 3) {
@@ -30,15 +30,23 @@ if (length(args) >= 3) {
study_info <- "../Code/StudyInfo.csv" # for legacy workflow study_info <- "../Code/StudyInfo.csv" # for legacy workflow
} }
studies <- args[3:length(args)] studies <- args[4:length(args)]
print(studies)
input_files <- c() input_files <- c()
for (study in 1:length(studies)) { for (i in seq_along(studies)) {
study <- studies[i]
zs_file <- file.path(study, "zscores", "zscores_interaction.csv") zs_file <- file.path(study, "zscores", "zscores_interaction.csv")
if (file.exists(zs_file)) { if (file.exists(zs_file)) {
input_files[study] <- zs_file input_files[i] <- zs_file
} }
} }
rm(zs_file, study)
for (var in ls()) {
print(paste(var, ":", get(var)))
}
print(input_files)
print(length(input_files)) print(length(input_files))
# TODO this is better handled in a loop in case you want to compare more experiments? # TODO this is better handled in a loop in case you want to compare more experiments?
@@ -47,7 +55,12 @@ print(length(input_files))
# Join the two files at a time as a function of how many inputFile # Join the two files at a time as a function of how many inputFile
# list the larger file first ? in this example X2 has the larger number of genes # list the larger file first ? in this example X2 has the larger number of genes
# If X1 has a larger number of genes, switch the order of X1 and X2 # If X1 has a larger number of genes, switch the order of X1 and X2
if (length(input_files) == 2) { if (length(input_files) == 1) {
print("Only one experiment to compare, skipping join")
stop("Exiting script")
}
if (length(input_files) >= 2) {
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE)
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE)
X <- join(X1, X2, by = "OrfRep") X <- join(X1, X2, by = "OrfRep")
@@ -57,11 +70,10 @@ if (length(input_files) == 2) {
headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column headSel <- select(headSel, -"Gene.1") # remove "Gene.1 column
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) #Frame for interleaving Z_lm with Shift colums
headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums headSel2 <- select(headSel2, -"Gene.1") # remove "Gene.1 column #Frame for interleaving Z_lm with Shift colums
} else if (length(input_files) == 3) { }
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE)
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE) if (length(input_files) >= 3) {
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE) X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE)
X <- join(X1, X2, by = "OrfRep")
X <- join(X, X3, by = "OrfRep") X <- join(X, X3, by = "OrfRep")
OBH <- X[, order(colnames(X))] # OrderByHeader OBH <- X[, order(colnames(X))] # OrderByHeader
headSel <- select(OBH, contains("OrfRep"), matches("Gene"), headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
@@ -69,14 +81,10 @@ if (length(input_files) == 2) {
headSel <- select(headSel, -"Gene.1", -"Gene.2") headSel <- select(headSel, -"Gene.1", -"Gene.2")
headSel2 <- select(OBH, contains("OrfRep"), matches("Gene")) headSel2 <- select(OBH, contains("OrfRep"), matches("Gene"))
headSel2 <- select(headSel2, -"Gene.1", -"Gene.2") headSel2 <- select(headSel2, -"Gene.1", -"Gene.2")
}
} else if (length(input_files) == 4) { if (length(input_files) >= 4) {
X1 <- read.csv(file = input_files[1], stringsAsFactors = FALSE) # exp1File,stringsAsFactors = FALSE) X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE)
X2 <- read.csv(file = input_files[2], stringsAsFactors = FALSE) # exp2File,stringsAsFactors = FALSE)
X3 <- read.csv(file = input_files[3], stringsAsFactors = FALSE) # exp3File,stringsAsFactors = FALSE)
X4 <- read.csv(file = input_files[4], stringsAsFactors = FALSE) # exp4File,stringsAsFactors = FALSE)
X <- join(X1, X2, by = "OrfRep")
X <- join(X, X3, by = "OrfRep")
X <- join(X, X4, by = "OrfRep") X <- join(X, X4, by = "OrfRep")
OBH <- X[, order(colnames(X))] # OrderByHeader OBH <- X[, order(colnames(X))] # OrderByHeader
headSel <- select(OBH, contains("OrfRep"), matches("Gene"), headSel <- select(OBH, contains("OrfRep"), matches("Gene"),
@@ -103,7 +111,7 @@ REMcRdy <- select(headSel, contains("OrfRep"), matches("Gene"), contains("Z_lm_
shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("Z_Shift")) shiftOnly <- select(headSel, contains("OrfRep"), matches("Gene"), contains("Z_Shift"))
# Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt # Code to replace the numeric (.1 .2 .3) headers with experiment names from StudyInfo.txt
Labels <- read.csv(file = "../Code/StudyInfo.csv", stringsAsFactors = FALSE, sep = ",") Labels <- read.csv(file = study_info, stringsAsFactors = FALSE, sep = ",")
# Using Text search grepl to relabel headers # Using Text search grepl to relabel headers
REMcRdyHdr <- colnames(REMcRdy) REMcRdyHdr <- colnames(REMcRdy)
@@ -168,10 +176,10 @@ Vec7 <- NA
Vec8 <- NA Vec8 <- NA
if (length(REMcRdy) == 6) { if (length(REMcRdy) == 6) {
Vec1 <- abs(REMcRdy[, 3]) >= std Vec1 <- abs(REMcRdy[, 3]) >= sd
Vec2 <- abs(REMcRdy[, 4]) >= std Vec2 <- abs(REMcRdy[, 4]) >= sd
Vec3 <- abs(REMcRdy[, 5]) >= std Vec3 <- abs(REMcRdy[, 5]) >= sd
Vec4 <- abs(REMcRdy[, 6]) >= std Vec4 <- abs(REMcRdy[, 6]) >= sd
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 bolVec <- Vec1 | Vec2 | Vec3 | Vec4
REMcRdyGT2 <- REMcRdy[bolVec, 1:2] REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6] REMcRdyGT2[, 3:6] <- REMcRdy[bolVec, 3:6]
@@ -180,12 +188,12 @@ if (length(REMcRdy) == 6) {
} }
if (length(REMcRdy) == 8) { if (length(REMcRdy) == 8) {
Vec1 <- abs(REMcRdy[, 3]) >= std Vec1 <- abs(REMcRdy[, 3]) >= sd
Vec2 <- abs(REMcRdy[, 4]) >= std Vec2 <- abs(REMcRdy[, 4]) >= sd
Vec3 <- abs(REMcRdy[, 5]) >= std Vec3 <- abs(REMcRdy[, 5]) >= sd
Vec4 <- abs(REMcRdy[, 6]) >= std Vec4 <- abs(REMcRdy[, 6]) >= sd
Vec5 <- abs(REMcRdy[, 7]) >= std Vec5 <- abs(REMcRdy[, 7]) >= sd
Vec6 <- abs(REMcRdy[, 8]) >= std Vec6 <- abs(REMcRdy[, 8]) >= sd
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6
REMcRdyGT2 <- REMcRdy[bolVec, 1:2] REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8] REMcRdyGT2[, 3:8] <- REMcRdy[bolVec, 3:8]
@@ -194,14 +202,14 @@ if (length(REMcRdy) == 8) {
} }
if (length(REMcRdy) == 10) { if (length(REMcRdy) == 10) {
Vec1 <- abs(REMcRdy[, 3]) >= std Vec1 <- abs(REMcRdy[, 3]) >= sd
Vec2 <- abs(REMcRdy[, 4]) >= std Vec2 <- abs(REMcRdy[, 4]) >= sd
Vec3 <- abs(REMcRdy[, 5]) >= std Vec3 <- abs(REMcRdy[, 5]) >= sd
Vec4 <- abs(REMcRdy[, 6]) >= std Vec4 <- abs(REMcRdy[, 6]) >= sd
Vec5 <- abs(REMcRdy[, 7]) >= std Vec5 <- abs(REMcRdy[, 7]) >= sd
Vec6 <- abs(REMcRdy[, 8]) >= std Vec6 <- abs(REMcRdy[, 8]) >= sd
Vec7 <- abs(REMcRdy[, 9]) >= std Vec7 <- abs(REMcRdy[, 9]) >= sd
Vec8 <- abs(REMcRdy[, 10]) >= std Vec8 <- abs(REMcRdy[, 10]) >= sd
bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8 bolVec <- Vec1 | Vec2 | Vec3 | Vec4 | Vec5 | Vec6 | Vec7 | Vec8
REMcRdyGT2 <- REMcRdy[bolVec, 1:2] REMcRdyGT2 <- REMcRdy[bolVec, 1:2]
REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10] REMcRdyGT2[, 3:10] <- REMcRdy[bolVec, 3:10]
@@ -209,12 +217,12 @@ if (length(REMcRdy) == 10) {
shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10] shiftOnlyGT2[, 3:10] <- shiftOnly[bolVec, 3:10]
} }
if (std != 0) { if (sd != 0) {
REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)] REMcRdy <- REMcRdyGT2 # [,2:length(REMcRdyGT2)]
shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)] shiftOnly <- shiftOnlyGT2 # [,2:length(shiftOnlyGT2)]
} }
if (std == 0) { if (sd == 0) {
REMcRdy <- REMcRdy # [,2:length(REMcRdy)] REMcRdy <- REMcRdy # [,2:length(REMcRdy)]
shiftOnly <- shiftOnly # [,2:length(shiftOnly)] shiftOnly <- shiftOnly # [,2:length(shiftOnly)]
} }
@@ -228,7 +236,7 @@ write.csv(shiftOnly, file.path(out_dir, "Shift_only.csv"), row.names = FALSE, qu
#LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",") #LabelStd <- read.table(file="./parameters.csv",stringsAsFactors = FALSE,sep = ",")
LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE) LabelStd <- read.csv(file = study_info, stringsAsFactors = FALSE)
print(std) print(sd)
LabelStd[, 4] <- as.numeric(std) LabelStd[, 4] <- as.numeric(sd)
write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE) write.csv(LabelStd, file = file.path(out_dir, "parameters.csv"), row.names = FALSE)
write.csv(LabelStd, file = study_info, row.names = FALSE) write.csv(LabelStd, file = study_info, row.names = FALSE)

View File

@@ -139,7 +139,7 @@ print_help() {
# `--project`, `--module`, `--nomodule`, and `--wrapper` can be passed multiple times or with a comma-separated string # `--project`, `--module`, `--nomodule`, and `--wrapper` can be passed multiple times or with a comma-separated string
# @option -p<value> | --project=<value> One or more projects to analyze, can be passed multiple times or with a comma-separated string # @option -p<value> | --project=<value> One or more projects to analyze, can be passed multiple times or with a comma-separated string
# @option -m<value> | --module=<value> One or more modules to run (default: all), can be passed multiple times or with a comma-separated string # @option -m<value> | --module=<value> One or more modules to run (default: all), can be passed multiple times or with a comma-separated string
# @option -w<value> | --wrapper=<value> Requires two arguments: the name of the wrapper and its arguments, can be passed multiple times # @option -w<value> | --wrapper=<value> One or more wrappers and its arguments to run, can be passed multiple times or with a comma-separated string
# @option -n<value> | --nomodule=<value> One or more modules (default: none) to exclude from the analysis # @option -n<value> | --nomodule=<value> One or more modules (default: none) to exclude from the analysis
# @option --markdown Generate the shdoc markdown file for this program # @option --markdown Generate the shdoc markdown file for this program
# @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode) # @option -y | --yes | --auto Assume yes answer to all questions (non-interactive mode)
@@ -163,20 +163,22 @@ parse_input() {
case $1 in case $1 in
--project|-p) --project|-p)
shift shift
declare -ga PROJECTS
IFS=',' read -ra PROJECTS <<< "$1" IFS=',' read -ra PROJECTS <<< "$1"
;; ;;
--module|-m) --module|-m)
shift shift
declare -ga MODULES
IFS=',' read -ra MODULES <<< "$1" IFS=',' read -ra MODULES <<< "$1"
;; ;;
--wrapper|-w) --wrapper|-w)
shift shift
IFS=',' read -ra WRAPPERS <<< "$1" declare -ga WRAPPERS
shift
IFS=',' read -ra WRAPPERS <<< "$1" IFS=',' read -ra WRAPPERS <<< "$1"
;; ;;
--nomodule|-n) --nomodule|-n)
shift shift
declare -ga EXCLUDE_MODULES
IFS=',' read -ra EXCLUDE_MODULES <<< "$1" IFS=',' read -ra EXCLUDE_MODULES <<< "$1"
;; ;;
--markdown) --markdown)
@@ -336,6 +338,10 @@ execute() {
# @description Backup one or more files to an incremented .bk file # @description Backup one or more files to an incremented .bk file
# #
# **TODO**
#
# * Make backups hidden by prepending "."?
#
# @exitcode backup iterator max 255 # @exitcode backup iterator max 255
# @internal # @internal
backup() { backup() {
@@ -343,8 +349,8 @@ backup() {
for f in "$@"; do for f in "$@"; do
[[ -e $f ]] || continue [[ -e $f ]] || continue
count=1 count=1
while [[ -f $f.bk.$count ]]; do while [[ -e $f.bk.$count ]]; do
count=$((count++)) ((count++))
done done
echo "Backing up $f to $f.bk.$count" echo "Backing up $f to $f.bk.$count"
debug "rsync -a $f $f.bk.$count" debug "rsync -a $f $f.bk.$count"
@@ -525,7 +531,7 @@ interactive_header() {
echo "" echo ""
# Module selection # Module selection
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 ]]; then if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
cat <<-EOF cat <<-EOF
${underline}Enter modules(s) to run${nounderline} ${underline}Enter modules(s) to run${nounderline}
* <Enter> for all * <Enter> for all
@@ -554,7 +560,7 @@ interactive_header() {
# If we're just installing dependencies, skip the rest # If we're just installing dependencies, skip the rest
[[ ${MODULES[*]} == "install_dependencies" ]] && return 0 [[ ${MODULES[*]} == "install_dependencies" ]] && return 0
# Submodule selection # Wrapper selection
if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then if [[ ${#MODULES[@]} -eq 0 && ${#EXCLUDE_MODULES[@]} -eq 0 && ${#WRAPPERS[@]} -eq 0 ]]; then
while :; do while :; do
cat <<-EOF cat <<-EOF
@@ -1335,7 +1341,7 @@ qhtcp() {
[[ -d $QHTCP_RESULTS_DIR ]] || [[ -d $QHTCP_RESULTS_DIR ]] ||
err "$QHTCP_RESULTS_DIR does not exist, have you run the init_project module?" err "$QHTCP_RESULTS_DIR does not exist, have you run the init_project module?"
# Sets STUDIES_NUMS and STUDIES_DIRS # Sets STUDIES
study_info study_info
choose_easy_results "$EASY_OUT_DIR" choose_easy_results "$EASY_OUT_DIR"
@@ -1348,22 +1354,24 @@ qhtcp() {
# # TODO Add them all to StudiesDataArchive? # # TODO Add them all to StudiesDataArchive?
# # Probably better to always add and remove dupes later since each invocation "counts"? # # Probably better to always add and remove dupes later since each invocation "counts"?
# for f in "${EASY_RESULTS_FILES[@]}"; do # for f in "${EASY_RESULTS_FILES[@]}"; do
# for s in "${STUDIES_NUMS[@]}"; do # for study in "${STUDIES[@]}"; do
# read -r num sd dir <<< "$study"
# # Trying to match old ExpFrontend formatting # # Trying to match old ExpFrontend formatting
# printf "%s\t" \ # printf "%s\t" \
# "${DATE//_/}" "$PROJECT_NAME" "$QHTCP_RESULTS_DIR" "Exp$s" \ # "${DATE//_/}" "$PROJECT_NAME" "$QHTCP_RESULTS_DIR" "Exp$num" \
# "$PROJECT_DATE" "$PROJECT_SCANS_DIR" "$EASY_RESULTS_DIR" "${f##*/}" \ # "$PROJECT_DATE" "$PROJECT_SCANS_DIR" "$EASY_RESULTS_DIR" "${f##*/}" \
# >> "$STUDIES_ARCHIVE_FILE" # >> "$STUDIES_ARCHIVE_FILE"
# done # done
# done # done
# Run R interactions script on all studies # Run R interactions script on all studies
for s in "${STUDIES_NUMS[@]}"; do for study in "${STUDIES[@]}"; do
[[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores ]] || read -r num sd dir <<< "$study"
execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores" [[ -d $dir/zscores ]] ||
[[ -d $QHTCP_RESULTS_DIR/Exp$s/zscores/qc ]] || execute mkdir "$dir/zscores"
execute mkdir "$QHTCP_RESULTS_DIR/Exp$s/zscores/qc" [[ -d $dir/zscores/qc ]] ||
r_interactions "$s" execute mkdir "$dir/zscores/qc"
r_interactions "$num" "$sd"
done \ done \
&& remc \ && remc \
&& gtf \ && gtf \
@@ -1384,13 +1392,12 @@ module remc
remc() { remc() {
debug "Running: ${FUNCNAME[0]}" debug "Running: ${FUNCNAME[0]}"
# Sets STUDIES_NUMS and STUDIES_DIRS # Sets STUDIES
study_info study_info
# If any wrappers fail the rest will not run, this is fundamental to module design # If any wrappers fail the rest will not run, this is fundamental to module design
# Remove leading && to run regardless # Remove leading && to run regardless
r_join_interactions \ r_join_interactions \
"${STUDIES_DIRS[@]}" \
&& java_extract \ && java_extract \
&& r_add_shift_values \ && r_add_shift_values \
&& r_create_heat_maps \ && r_create_heat_maps \
@@ -1453,36 +1460,40 @@ module gta
gta() { gta() {
debug "Running: ${FUNCNAME[0]}" debug "Running: ${FUNCNAME[0]}"
gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}" # gene_association_sgd="${2:-"$APPS_DIR/r/gene_association.sgd"}"
gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}" gene_ontology_obo="${3:-"$APPS_DIR/r/gene_ontology_edit.obo"}"
sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}" sgd_terms_tfile="${4:-"$APPS_DIR/r/go_terms.tab"}"
all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}" all_sgd_terms_csv="${5:-"$APPS_DIR/r/All_SGD_GOTerms_for_QHTCPtk.csv"}"
# TODO This could be wrong, it could be in main results # TODO This could be wrong, it could be in main results
# Sets STUDIES_NUMS and STUDIES_DIRS # Sets STUDIES
study_info study_info
[[ -d $GTA_OUT_DIR ]] && backup "$GTA_OUT_DIR" [[ -d $GTA_OUT_DIR ]] && backup "$GTA_OUT_DIR"
execute mkdir "$GTA_OUT_DIR" execute mkdir "$GTA_OUT_DIR"
# Loop over the array and create pairwise arrays # Loop over the array and create pairwise arrays
for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do for ((i=0; i<${#STUDIES[@]}; i++)); do
for ((j=i+1; j<${#STUDIES_NUMS[@]}; j++)); do for ((j=i+1; j<${#STUDIES[@]}; j++)); do
pair=("${STUDIES_NUMS[i]}" "${STUDIES_NUMS[j]}") read -r num1 _ _ <<< "${STUDIES[i]}"
read -r num2 _ _ <<< "${STUDIES[j]}"
pair=("$num1" "$num2")
echo "${pair[@]}" echo "${pair[@]}"
done done
done done
# Create unique parwise combinations of study nums from dir names # Create unique parwise combinations of study nums from dir names
study_combos=() study_combos=()
for ((i=0; i<${#STUDIES_NUMS[@]}; i++)); do for ((i=0; i<${#STUDIES[@]}; i++)); do
# Loop through the array again # Loop through the array again
for ((j=0; j<${#STUDIES_NUMS[@]}; j++)); do for ((j=0; j<${#STUDIES[@]}; j++)); do
# If the indices are not the same # If the indices are not the same
if [ "$i" != "$j" ]; then if [ "$i" != "$j" ]; then
# Print the unique combination # Print the unique combination
study_combos+=("${STUDIES_NUMS[$i]},${STUDIES_NUMS[$j]}") read -r num1 _ _ <<< "${STUDIES[i]}"
read -r num2 _ _ <<< "${STUDIES[j]}"
study_combos+=("$num1,$num2")
fi fi
done done
done done
@@ -1490,11 +1501,12 @@ gta() {
# The following are three types of studies # The following are three types of studies
# Individual studies # Individual studies
for s in "${STUDIES_NUMS[@]}"; do for study in "${STUDIES[@]}"; do
zscores_file="$QHTCP_RESULTS_DIR/Exp$s/$zscores_file" read -r num _ dir <<< "$study"
zscores_file="$dir/zscores/zscores_interaction.csv"
if [[ -f $zscores_file ]]; then if [[ -f $zscores_file ]]; then
mkdir "$GTA_OUT_DIR/Exp$s" mkdir "$GTA_OUT_DIR/Exp$num"
r_gta "Exp$s" "$zscores_file" r_gta "Exp$num" "$zscores_file"
fi fi
done done
@@ -1507,6 +1519,12 @@ gta() {
# All studies # All studies
# All preceding arguments are required so we can pass multiple studies # All preceding arguments are required so we can pass multiple studies
declare -a nums
for study in "${STUDIES[@]}"; do
read -r num _ _ <<< "$study"
nums+=("$num")
done
r_gta_heatmaps \ r_gta_heatmaps \
"$STUDY_INFO_FILE" \ "$STUDY_INFO_FILE" \
"$gene_ontology_obo" \ "$gene_ontology_obo" \
@@ -1514,7 +1532,7 @@ gta() {
"$all_sgd_terms_csv" \ "$all_sgd_terms_csv" \
"$QHTCP_RESULTS_DIR" \ "$QHTCP_RESULTS_DIR" \
"$QHTCP_RESULTS_DIR/TermSpecificHeatmaps" \ "$QHTCP_RESULTS_DIR/TermSpecificHeatmaps" \
"${STUDIES_NUMS[@]}" "${nums[@]}"
} }
@@ -1714,7 +1732,7 @@ wrapper r_interactions
# @arg $3 string study info file # @arg $3 string study info file
# @arg $4 string SGD_features.tab # @arg $4 string SGD_features.tab
# @arg $5 string easy/results_std.txt # @arg $5 string easy/results_std.txt
# @arg $6 string zscores directory # @arg $6 string output directory
r_interactions() { r_interactions() {
debug "Running: ${FUNCNAME[0]} $*" debug "Running: ${FUNCNAME[0]} $*"
cat <<-EOF cat <<-EOF
@@ -1729,7 +1747,11 @@ r_interactions() {
* Background values are reported in the results sheet and so could also be analyzed there. * Background values are reported in the results sheet and so could also be analyzed there.
EOF EOF
script="$APPS_DIR/r/interactions.R" declare script="$APPS_DIR/r/interactions.R"
declare out_dir="${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}"
[[ -d $out_dir ]] && backup "$out_dir"
mkdir "$out_dir"
execute "$RSCRIPT" "$script" \ execute "$RSCRIPT" "$script" \
"$1" \ "$1" \
@@ -1737,12 +1759,15 @@ r_interactions() {
"${3:-"$STUDY_INFO_FILE"}" \ "${3:-"$STUDY_INFO_FILE"}" \
"${4:-"$APPS_DIR/r/SGD_features.tab"}" \ "${4:-"$APPS_DIR/r/SGD_features.tab"}" \
"${5:-"$EASY_RESULTS_DIR/results_std.txt"}" \ "${5:-"$EASY_RESULTS_DIR/results_std.txt"}" \
"${6:-"$QHTCP_RESULTS_DIR/Exp$1/zscores"}" \ "$out_dir" \
"${@:7}" # future arguments "${@:7}" # future arguments
[[ -f "$out_dir/zscores_interaction.csv" ]] || (echo "$out_dir/zscores_interaction.csv does not exist"; return 1)
} }
wrapper r_join_interactions wrapper r_join_interactions
# shellcheck disable=SC2120
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
# #
# TODO # TODO
@@ -1760,20 +1785,41 @@ wrapper r_join_interactions
# * Shift_only.csv # * Shift_only.csv
# * parameters.csv # * parameters.csv
# #
# @arg $1 string output directory # @arg $1 string output directory (required)
# @arg $2 string sd value (default: 2) # @arg $2 string sd value (default: 2) (required)
# @arg $3 string study info file # @arg $3 string study info file (required)
# @arg $4 array studies (required)
r_join_interactions() { r_join_interactions() {
debug "Running: ${FUNCNAME[0]} $*" debug "Running: ${FUNCNAME[0]} $*"
script="$APPS_DIR/r/joinInteractExps.R" declare script="$APPS_DIR/r/joinInteractExps.R"
declare -a dirs
declare -a out_files=(
"${1:-$QHTCP_RESULTS_DIR}/REMcRdy_lm_only.csv"
"${1:-$QHTCP_RESULTS_DIR}/Shift_only.csv"
"${1:-$QHTCP_RESULTS_DIR}/parameters.csv"
)
((DEBUG)) && declare -p
backup "${out_files[@]}"
# If user provides study dirs, use those
if [[ $# -gt 3 ]]; then
dirs=("${@:4}")
else
study_info
for study in "${STUDIES[@]}"; do
read -r _ _ dir <<< "$study"
dirs+=("$dir")
done
fi
execute "$RSCRIPT" "$script" \ execute "$RSCRIPT" "$script" \
"${1:-$QHTCP_RESULTS_DIR}" \ "${1:-$QHTCP_RESULTS_DIR}" \
"${2:-2}" \ "${2:-2}" \
"${3:-$STUDY_INFO_FILE}" \ "${3:-$STUDY_INFO_FILE}" \
"${@:4:-${STUDIES_DIRS[@]}}" "${dirs[@]}"
local out_files=("$1/REMcRdy_lm_only.csv" "$1/Shift_only.csv" "$1/parameters.csv")
for f in "${out_files[@]}"; do for f in "${out_files[@]}"; do
[[ -f $f ]] || (echo "$f does not exist"; return 1) [[ -f $f ]] || (echo "$f does not exist"; return 1)
done done
@@ -1816,6 +1862,9 @@ java_extract() {
"${2:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv"}" "${2:-"$QHTCP_RESULTS_DIR/REMcRdy_lm_only.csv"}"
"${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}" "${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
"${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}" "${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
1
true
true
) )
debug "pushd && ${java_cmd[*]} && popd" debug "pushd && ${java_cmd[*]} && popd"
@@ -2021,8 +2070,7 @@ r_compile_gtf() {
# #
# @exitcode 0 If one or more studies found # @exitcode 0 If one or more studies found
# @exitcode 1 If no studies found # @exitcode 1 If no studies found
# @set STUDIES_NUMS array contains Exp numbers # @set STUDIES array contains array of "Exp# sd ExpDir"
# @set STUDIES_DIRS array contains Exp directories
study_info() { study_info() {
debug "Running: ${FUNCNAME[0]}" debug "Running: ${FUNCNAME[0]}"
@@ -2116,26 +2164,28 @@ study_info() {
fi fi
# Read study info file # Read study info file
while IFS=',' read -r col1 _; do # split on comma, get Exp # from 1st column declare -ga STUDIES
STUDIES_NUMS+=("$col1") while IFS=',' read -r num _ sd _; do
STUDIES+=("$num $sd $QHTCP_RESULTS_DIR/Exp$num")
done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header done < <(tail -n +2 "$STUDY_INFO_FILE") # skip header
# Initialize missing Exp dirs # Initialize missing Exp dirs
STUDIES_DIRS=() for study in "${STUDIES[@]}"; do
for s in "${STUDIES_NUMS[@]}"; do read -r _ _ dir <<< "$study"
study_dir="$QHTCP_RESULTS_DIR/Exp$s" [[ -d $dir ]] || mkdir "$dir"
STUDIES_DIRS+=("$study_dir")
[[ -d $study_dir ]] || mkdir "$study_dir"
# We don't need a template anymore?
# if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
# err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
# continue
# fi
done done
# # We don't need a template anymore?
# # if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
# # err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
# # continue
# # fi
# done
((DEBUG)) && declare -p STUDIES
# Return true if at least one study was found # Return true if at least one study was found
[[ ${#STUDIES_NUMS[@]} -gt 0 ]] [[ ${#STUDIES[@]} -gt 0 ]]
} }
@@ -2287,6 +2337,8 @@ main() {
parse_input "$@" # parse arguments with getopt parse_input "$@" # parse arguments with getopt
# ((DEBUG)) && declare -p
interactive_header "$@" interactive_header "$@"
# # Prompt user for the PROJECT if we still don't have one # # Prompt user for the PROJECT if we still don't have one
@@ -2345,14 +2397,8 @@ main() {
declare -gx GTA_OUT_DIR="$QHTCP_RESULTS_DIR/gta" declare -gx GTA_OUT_DIR="$QHTCP_RESULTS_DIR/gta"
declare -gx GTF_OUT_DIR="$QHTCP_RESULTS_DIR/gtf" declare -gx GTF_OUT_DIR="$QHTCP_RESULTS_DIR/gtf"
declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"} declare -gx R_LIBS_USER=${R_LIBS_USER:-"$HOME/R/$SCRIPT_NAME"}
if ((DEBUG)); then
echo "Debug:" # ((DEBUG)) && declare -p
declare -p SCANS_DIR OUT_DIR TEMPLATES_DIR APPS_DIR \
PROJECTS PROJECT_NAME \
PROJECT_SCANS_DIR PROJECT_DATE PROJECT_SUFFIX PROJECT_USER \
STUDIES_ARCHIVE_FILE QHTCP_RESULTS_DIR QHTCP_TEMPLATE_DIR \
STUDY_INFO_FILE EASY_RESULTS_DIR R_LIBS_USER
fi
debug "Active modules: ${MODULES[*]}" debug "Active modules: ${MODULES[*]}"
debug "Active wrappers and their args: ${WRAPPERS[*]}" debug "Active wrappers and their args: ${WRAPPERS[*]}"
@@ -2365,21 +2411,19 @@ main() {
done done
# Run selected wrappers # Run selected wrappers
for i in "${!WRAPPERS[@]}"; do for wrapper in "${WRAPPERS[@]}"; do
IFS=',' read -ra args <<< "${WRAPPERS[$((i+1))]}" # load the command args IFS=',' read -ra args <<< "$wrapper" # load the command args
if ask "Run ${WRAPPERS[i]} wrapper with args ${args[*]}?"; then if ask "Run ${args[0]} wrapper with args ${args[*]:1}?"; then
"${WRAPPERS[i]}" "${args[@]}" || return 1 "${args[0]}" "${args[@]:1}" || return 1
fi fi
continue 2 # skip the command string
done done
done done
cat <<-EOF [[ ${#MODULES[@]} -gt 0 ]] && echo "Successfully ran module(s): ${MODULES[*]}"
Successfully ran module(s): ${MODULES[*]} [[ ${#WRAPPERS[@]} -gt 0 ]] && echo "Successfully ran wrapper(s): ${WRAPPERS[*]}"
And wrapper(s): ${WRAPPERS[*]} [[ ${#PROJECTS[@]} -gt 0 ]] && echo "On project(s): ${PROJECTS[*]}"
On project(s): ${PROJECTS[*]}
EOF unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES SET_STUDIES YES
unset MODULES WRAPPERS EXCLUDE_MODULES STUDIES_NUMS STUDIES_DIRS SET_STUDIES YES
} }
# (Safe) main loop # (Safe) main loop