Modularize joinInteractExps.R

This commit is contained in:
2024-07-27 23:53:54 -04:00
parent 66725024b8
commit c8eba4efd4
118 changed files with 291 additions and 2593503 deletions

View File

@@ -380,7 +380,6 @@ init_project() {
ask "Initialize a project at $SCANS_DIR?" || return 1
# Write skeleton files in csv
# If we have to convert to xlsx later, so be it
cat <<-EOF > "$DRUG_MEDIA_FILE"
@@ -398,9 +397,6 @@ init_project() {
# TODO here we'll copy scan from robot but for now let's pause and wait for transfer
read -r -p "Hit <Enter> to continue: "
}
@@ -673,11 +669,12 @@ module qhtcp
qhtcp() {
debug "Running: ${FUNCNAME[0]}"
QHTCP_BASE_DIR="/mnt/data/StudiesQHTCP"
QHTCP_PROJECT_DIR="$QHTCP_BASE_DIR/$PROJECT"
CODE_DIR="$QHTCP_PROJECT_DIR/Code"
STUDIES_ARCHIVE_FILE="$QHTCP_BASE_DIR/StudiesDataArchive.txt"
STUDY_INFO_FILE="$CODE_DIR/StudyInfo.csv"
OUT_DIR="/mnt/data/out/qhtcp"
STUDIES_ARCHIVE_FILE="$OUT_DIR/StudiesDataArchive.txt"
QHTCP_PROJECT_DIR="$OUT_DIR/$PROJECT"
APPS_DIR="$QHTCP_PROJECT_DIR/apps"
CODE_DIR="$QHTCP_PROJECT_DIR/Code" # legacy location, keeping until can port each script to APPS
STUDY_INFO_FILE="$QHTCP_PROJECT_DIR/StudyInfo.csv"
if [[ -d $QHTCP_PROJECT_DIR ]]; then
echo "A project already exists at $QHTCP_PROJECT_DIR"
@@ -735,8 +732,10 @@ qhtcp() {
fi
# Initialize missing dirs
STUDIES_DIRS=()
for s in "${STUDIES_NUMS[@]}"; do
STUDY_DIR="$QHTCP_PROJECT_DIR/Exp$s"
STUDIES_DIRS+=("$STUDY_DIR")
if ! [[ -d $STUDY_DIR ]]; then
if ! rsync --archive "$STUDY_TEMPLATE_DIR" "$STUDY_DIR"; then
err "Could not copy $STUDY_TEMPLATE_DIR template to $STUDY_DIR"
@@ -788,18 +787,17 @@ qhtcp() {
# Run R interactions script on all studies
for s in "${STUDIES_NUMS[@]}"; do
STUDY_DIR="$QHTCP_PROJECT_DIR/Exp$s"
pushd "$STUDY_DIR" || return 1
r_interactions \
"$STUDY_DIR"\
"$STUDY_INFO_FILE"\
"/ZScores/" \
"$STUDY_DIR" \
"$STUDY_INFO_FILE" \
"$STUDY_DIR/ZScores/" \
"$CODE_DIR/SGD_features.tab" \
5
popd || return 1
done
# Run remc as part of the QHTCP process
remc
# pass all the study directories to it so the scripts have all the paths
remc "$STUDY_INFO_FILE" "${STUDIES_DIRS[@]}"
}
@@ -814,7 +812,7 @@ module remc
# * Use arguments to pass configuration variables
# * This allows us to abstract the program away in script-run-workflow and treat it like a module
remc() {
debug "Running: ${FUNCNAME[0]}"
debug "Running: ${FUNCNAME[0]}" "$@"
# Enter REMc directory to run the scripts there
pushd "$QHTCP_PROJECT_DIR/REMc" || return 1
@@ -823,7 +821,7 @@ remc() {
# If any modules fail the rest will not run, this is fundamental to module design
# Remove trailing && to run regardless
# TODO can this be
r_join_interact &&
r_join_interactions "$QHTCP_PROJECT_DIR/1-join_interactions" 2 "$1" "${@:2}" &&
java_extract &&
r_add_shift_values &&
r_heat_maps_zscores &&
@@ -1131,22 +1129,25 @@ r_interactions() {
This is most often "trial and error", meaning there is a 'Frequency_Delta_Background.pdf' report in the /Exp_/ZScores/QC/ folder to evaluate whether the chosen value was suitable (and if not the analysis can simply be rerun with a more optimal choice). In general, err on the high side, with BSD of 10 or 12…. One can also use EZview to examine the raw images and individual cultures potentially included/excluded as a consequence of the selected value. Background values are reported in the results sheet and so could also be analyzed there..
EOF
script="Z_InteractionTemplate.R"
script="$APPS_DIR/r/interactions.R"
debug "$RSCRIPT $script" "$@"
"$RSCRIPT" "$script" "$@"
}
submodule r_join_interact
submodule r_join_interactions
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
r_join_interact() {
debug "Running: ${FUNCNAME[0]}"
debug "$RSCRIPT JoinInteractExps3dev.R"
"$RSCRIPT" JoinInteractExps3dev.R
out_file="REMcRdy_lm_only.csv"
out_file2="Shift_only.csv"
for f in "$out_file" "$out_file2"; do
# @arg $1 string The output directory
# @arg $2 string The sd value
# @arg $3 string The studyInfo file
r_join_interactions() {
debug "Running: ${FUNCNAME[0]}" "$@"
script="$APPS_DIR/r/joinInteractExps.R"
debug "$RSCRIPT $script"
"$RSCRIPT" "$script" "$@"
local out_files=("REMcRdy_lm_only.csv" "Shift_only.csv" "parameters.csv")
for f in "${out_files[@]}"; do
[[ -f $f ]] || (echo "$f does not exist"; return 1)
done
}
@@ -1427,7 +1428,7 @@ main() {
# Global directory variables
TEMPLATES_DIR="$SCRIPT_DIR/templates"
QHTCP_TEMPLATE_DIR="$TEMPLATES_DIR/qhtcp"
STUDY_TEMPLATE_DIR="$TEMPLATES_DIR/demo"
STUDY_TEMPLATE_DIR="$QHTCP_TEMPLATE_DIR/ExpTemplate"
EASY_DIR="$SCRIPT_DIR/apps/matlab/easy"
DATE="$(date +%Y%m%d)" # change in EASYConsole.m to match
SCANS="${SCANS:-"./scans":-"/mnt/data/ExpJobs"}"
@@ -1501,7 +1502,6 @@ main() {
PROJECT_DATE="${PROJECT%"${PROJECT#????????}"}" # e.g. 20240723
PROJECT_SUFFIX="${PROJECT#????????_*_}"
PROJECT_USER="${PROJECT#????????_}"; PROJECT_USER="${PROJECT_USER%%_*}"
PROJECT_NAME="${PROJECT_DATE}_${PROJECT_USER}_${PROJECT_SUFFIX}"
# Run selected modules
for m in "${MODULES[@]}"; do
ask "Run $m" && "$m"