Rollup before java debug
This commit is contained in:
@@ -26,8 +26,8 @@ args <- parse_arguments()
|
|||||||
|
|
||||||
# Create an array for the zscores files
|
# Create an array for the zscores files
|
||||||
get_zscores_files <- function(dirs) {
|
get_zscores_files <- function(dirs) {
|
||||||
files <- sapply(dirs, function(study) {
|
files <- sapply(dirs, function(exp) {
|
||||||
file_path <- file.path(study, "zscores", "zscores_interaction.csv")
|
file_path <- file.path(exp, "zscores", "zscores_interaction.csv")
|
||||||
if (file.exists(file_path)) file_path else NULL
|
if (file.exists(file_path)) file_path else NULL
|
||||||
}, simplify = TRUE, USE.NAMES = FALSE)
|
}, simplify = TRUE, USE.NAMES = FALSE)
|
||||||
|
|
||||||
|
|||||||
113
workflow/apps/r/join_interaction_zscores.R
Normal file
113
workflow/apps/r/join_interaction_zscores.R
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
suppressMessages({
|
||||||
|
library("dplyr")
|
||||||
|
library("data.table")
|
||||||
|
library("readr")
|
||||||
|
library("stringr")
|
||||||
|
})
|
||||||
|
|
||||||
|
# Function to parse arguments
|
||||||
|
parse_arguments <- function() {
|
||||||
|
if (interactive()) {
|
||||||
|
args <- c(
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD",
|
||||||
|
3, # sd value
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp1",
|
||||||
|
"Experiment 1: Doxo versus HLD",
|
||||||
|
"/home/bryan/documents/develop/scripts/hartmanlab/workflow/out/20240116_jhartman2_DoxoHLD/20240822_jhartman2_DoxoHLD/exp2",
|
||||||
|
"Experiment 2: HLD versus Doxo"
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
out_dir <- normalizePath(args[1], mustWork = FALSE)
|
||||||
|
sd <- as.numeric(args[2])
|
||||||
|
paths <- normalizePath(args[seq(3, length(args), by = 2)], mustWork = FALSE)
|
||||||
|
names <- args[seq(4, length(args), by = 2)]
|
||||||
|
experiments <- setNames(paths, names)
|
||||||
|
|
||||||
|
list(
|
||||||
|
out_dir = out_dir,
|
||||||
|
sd = sd,
|
||||||
|
experiments = experiments
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
args <- parse_arguments()
|
||||||
|
|
||||||
|
# Ensure main output directory exists
|
||||||
|
dir.create(args$out_dir, showWarnings = FALSE, recursive = TRUE)
|
||||||
|
|
||||||
|
# Function to read and combine z-score interaction files
|
||||||
|
combine_zscores <- function(experiments, out_dir) {
|
||||||
|
combined_data <- lapply(names(experiments), function(exp_name) {
|
||||||
|
exp_dir <- experiments[[exp_name]]
|
||||||
|
zscore_file <- file.path(exp_dir, "zscores", "zscores_interaction.csv")
|
||||||
|
|
||||||
|
if (!file.exists(zscore_file)) {
|
||||||
|
stop("Z-score file does not exist for ", exp_name, " at ", zscore_file)
|
||||||
|
}
|
||||||
|
|
||||||
|
message("Reading z-score file for ", exp_name, " from ", zscore_file)
|
||||||
|
data <- fread(zscore_file)
|
||||||
|
data$Experiment <- exp_name
|
||||||
|
|
||||||
|
return(data)
|
||||||
|
}) %>%
|
||||||
|
bind_rows()
|
||||||
|
|
||||||
|
combined_output_file <- file.path(out_dir, "combined_zscores.csv")
|
||||||
|
fwrite(combined_data, combined_output_file, row.names = FALSE)
|
||||||
|
|
||||||
|
message("Combined z-score file saved to: ", combined_output_file)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to read and combine summary statistics files
|
||||||
|
combine_summary_stats <- function(experiments, out_dir) {
|
||||||
|
combined_stats <- lapply(names(experiments), function(exp_name) {
|
||||||
|
exp_dir <- experiments[[exp_name]]
|
||||||
|
summary_file <- file.path(exp_dir, "zscores", "summary_stats_all_strains.csv")
|
||||||
|
|
||||||
|
if (!file.exists(summary_file)) {
|
||||||
|
stop("Summary stats file does not exist for ", exp_name, " at ", summary_file)
|
||||||
|
}
|
||||||
|
|
||||||
|
message("Reading summary stats file for ", exp_name, " from ", summary_file)
|
||||||
|
data <- fread(summary_file)
|
||||||
|
data$Experiment <- exp_name
|
||||||
|
|
||||||
|
return(data)
|
||||||
|
}) %>%
|
||||||
|
bind_rows()
|
||||||
|
|
||||||
|
combined_output_file <- file.path(out_dir, "combined_summary_stats.csv")
|
||||||
|
fwrite(combined_stats, combined_output_file, row.names = FALSE)
|
||||||
|
|
||||||
|
message("Combined summary stats file saved to: ", combined_output_file)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Function to generate final summary report
|
||||||
|
generate_final_report <- function(out_dir) {
|
||||||
|
combined_zscores <- file.path(out_dir, "combined_zscores.csv")
|
||||||
|
combined_stats <- file.path(out_dir, "combined_summary_stats.csv")
|
||||||
|
|
||||||
|
if (!file.exists(combined_zscores) || !file.exists(combined_stats)) {
|
||||||
|
stop("Combined z-scores or summary stats files do not exist.")
|
||||||
|
}
|
||||||
|
|
||||||
|
zscores_data <- fread(combined_zscores)
|
||||||
|
stats_data <- fread(combined_stats)
|
||||||
|
|
||||||
|
message("Merging z-score and summary stats data...")
|
||||||
|
final_report <- merge(zscores_data, stats_data, by = c("OrfRep", "Experiment"), all = TRUE, allow.cartesian = TRUE)
|
||||||
|
|
||||||
|
final_report_file <- file.path(out_dir, "final_combined_report.csv")
|
||||||
|
fwrite(final_report, final_report_file, row.names = FALSE)
|
||||||
|
|
||||||
|
message("Final combined report saved to: ", final_report_file)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Process all experiments and generate outputs
|
||||||
|
combine_zscores(args$experiments, args$out_dir)
|
||||||
|
combine_summary_stats(args$experiments, args$out_dir)
|
||||||
|
generate_final_report(args$out_dir)
|
||||||
@@ -1296,6 +1296,7 @@ qhtcp() {
|
|||||||
|
|
||||||
# Run R interactions script on all studies
|
# Run R interactions script on all studies
|
||||||
calculate_interaction_zscores \
|
calculate_interaction_zscores \
|
||||||
|
&& join_interaction_zscores \
|
||||||
&& remc \
|
&& remc \
|
||||||
&& gtf \
|
&& gtf \
|
||||||
&& gta
|
&& gta
|
||||||
@@ -1316,7 +1317,6 @@ remc() {
|
|||||||
|
|
||||||
# If any wrappers fail the rest will not run, this is fundamental to module design
|
# If any wrappers fail the rest will not run, this is fundamental to module design
|
||||||
# Remove leading && to run regardless
|
# Remove leading && to run regardless
|
||||||
#r_join_interactions \
|
|
||||||
java_extract \
|
java_extract \
|
||||||
&& r_add_shift_values \
|
&& r_add_shift_values \
|
||||||
&& r_create_heat_maps \
|
&& r_create_heat_maps \
|
||||||
@@ -1522,7 +1522,50 @@ calculate_interaction_zscores() {
|
|||||||
"${EXP_PATHS_AND_NAMES[@]}"
|
"${EXP_PATHS_AND_NAMES[@]}"
|
||||||
|
|
||||||
[[ -f "$out_path/zscores_interaction.csv" ]] || (echo "$out_path/zscores_interaction.csv does not exist"; return 1)
|
[[ -f "$out_path/zscores_interaction.csv" ]] || (echo "$out_path/zscores_interaction.csv does not exist"; return 1)
|
||||||
((DEBUG)) && declare -p && exit # when the going gets rough
|
}
|
||||||
|
|
||||||
|
|
||||||
|
wrapper join_interaction_zscores
|
||||||
|
# shellcheck disable=SC2120
|
||||||
|
# @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
|
||||||
|
#
|
||||||
|
# TODO
|
||||||
|
#
|
||||||
|
# * Needs more loops to reduce verbosity
|
||||||
|
#
|
||||||
|
# INPUT
|
||||||
|
#
|
||||||
|
# * /out/PROJECT/STUDY/exp#/zscores/zscores_interaction.csv
|
||||||
|
#
|
||||||
|
# OUTPUT
|
||||||
|
#
|
||||||
|
# * combined_zscores.csv (REMcRdy_lm_only.csv)
|
||||||
|
# * combined_summary_stats (Shift_only.csv)
|
||||||
|
# * final_combined_report (parameters.csv)
|
||||||
|
#
|
||||||
|
# @arg $1 string output directory
|
||||||
|
# @arg $2 string sd value (default: 2)
|
||||||
|
# @arg $3 array pairs of experiment paths and names
|
||||||
|
join_interaction_zscores() {
|
||||||
|
debug "Running: ${FUNCNAME[0]} $*"
|
||||||
|
declare script="$APPS_DIR/r/join_interaction_zscores.R"
|
||||||
|
declare -a out_files=(
|
||||||
|
"${1:-$STUDY_RESULTS_DIR}/combined_zscores.csv"
|
||||||
|
"${1:-$STUDY_RESULTS_DIR}/combined_summary_stats.csv"
|
||||||
|
"${1:-$STUDY_RESULTS_DIR}/final_combined_report.csv"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ((DEBUG)) && declare -p # when the going gets tough
|
||||||
|
|
||||||
|
execute "$RSCRIPT" "$script" \
|
||||||
|
"${1:-$STUDY_RESULTS_DIR}" \
|
||||||
|
"${2:-2}" \
|
||||||
|
"${@:3:}" \
|
||||||
|
"${EXP_PATHS_AND_NAMES[@]}"
|
||||||
|
|
||||||
|
for f in "${out_files[@]}"; do
|
||||||
|
[[ -f $f ]] || (echo "$f does not exist"; return 1)
|
||||||
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1671,52 +1714,6 @@ r_gta_heatmaps() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# wrapper r_join_interactions
|
|
||||||
# # shellcheck disable=SC2120
|
|
||||||
# # @description JoinInteractExps3dev.R creates REMcRdy_lm_only.csv and Shift_only.csv
|
|
||||||
# #
|
|
||||||
# # TODO
|
|
||||||
# #
|
|
||||||
# # * Needs more loops to reduce verbosity
|
|
||||||
# #
|
|
||||||
# # INPUT
|
|
||||||
# #
|
|
||||||
# # * study info file
|
|
||||||
# # * /out/PROJECT/STUDY/exp#/zscores/zscores_interaction.csv
|
|
||||||
# #
|
|
||||||
# # OUTPUT
|
|
||||||
# #
|
|
||||||
# # * REMcRdy_lm_only.csv
|
|
||||||
# # * Shift_only.csv
|
|
||||||
# # * parameters.csv
|
|
||||||
# #
|
|
||||||
# # @arg $1 string output directory (required)
|
|
||||||
# # @arg $2 string sd value (default: 2) (required)
|
|
||||||
# # @arg $3 string study info file (required)
|
|
||||||
# # @arg $4 array studies (required)
|
|
||||||
# r_join_interactions() {
|
|
||||||
# debug "Running: ${FUNCNAME[0]} $*"
|
|
||||||
# declare script="$APPS_DIR/r/joinInteractExps.R"
|
|
||||||
# declare -a out_files=(
|
|
||||||
# "${1:-$STUDY_RESULTS_DIR}/REMcRdy_lm_only.csv"
|
|
||||||
# "${1:-$STUDY_RESULTS_DIR}/Shift_only.csv"
|
|
||||||
# "${1:-$STUDY_RESULTS_DIR}/parameters.csv"
|
|
||||||
# )
|
|
||||||
|
|
||||||
# # ((DEBUG)) && declare -p # when the going gets tough
|
|
||||||
|
|
||||||
# execute "$RSCRIPT" "$script" \
|
|
||||||
# "${1:-$STUDY_RESULTS_DIR}" \
|
|
||||||
# "${2:-2}" \
|
|
||||||
# "${3:-$STUDY_INFO_FILE}" \
|
|
||||||
# "${@:4:-${EXP_PATHS[@]}}"
|
|
||||||
|
|
||||||
# for f in "${out_files[@]}"; do
|
|
||||||
# [[ -f $f ]] || (echo "$f does not exist"; return 1)
|
|
||||||
# done
|
|
||||||
# }
|
|
||||||
|
|
||||||
|
|
||||||
wrapper java_extract
|
wrapper java_extract
|
||||||
# shellcheck disable=SC2120
|
# shellcheck disable=SC2120
|
||||||
# @description Jingyu's REMc java utility
|
# @description Jingyu's REMc java utility
|
||||||
@@ -1727,14 +1724,14 @@ wrapper java_extract
|
|||||||
#
|
#
|
||||||
# INPUT
|
# INPUT
|
||||||
#
|
#
|
||||||
# * REMcRdy_lm_only.csv
|
# * study_dir/combined_zscores.csv (REMcRdy_lm_only.csv)
|
||||||
#
|
#
|
||||||
# OUTPUT
|
# OUTPUT
|
||||||
#
|
#
|
||||||
# * REMcRdy_lm_only.csv-finalTable.csv
|
# * study_dir/combined_zscores_final.csv (REMcRdy_lm_only.csv-finalTable.csv)
|
||||||
#
|
#
|
||||||
# @arg $1 string output directory
|
# @arg $1 string output directory
|
||||||
# @arg $2 string REMcRdy_lm_only.csv
|
# @arg $2 string combined_zscores.csv
|
||||||
# @arg $3 string GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab
|
# @arg $3 string GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab
|
||||||
# @arg $4 string ORF_List_Without_DAmPs.txt
|
# @arg $4 string ORF_List_Without_DAmPs.txt
|
||||||
# @exitcode 0 if expected output file exists
|
# @exitcode 0 if expected output file exists
|
||||||
@@ -1743,14 +1740,14 @@ java_extract() {
|
|||||||
debug "Running: ${FUNCNAME[0]}"
|
debug "Running: ${FUNCNAME[0]}"
|
||||||
classpath="$APPS_DIR/java/weka-clustering/weka-clustering.jar"
|
classpath="$APPS_DIR/java/weka-clustering/weka-clustering.jar"
|
||||||
|
|
||||||
output_file="${1:-$STUDY_RESULTS_DIR}/REMcRdy_lm_only.csv-finalTable.csv"
|
output_file="${1:-$STUDY_RESULTS_DIR}/combined_zscores_final.csv"
|
||||||
|
|
||||||
[[ -f $output_file ]] && backup "$output_file"
|
[[ -f $output_file ]] && backup "$output_file"
|
||||||
|
|
||||||
java_cmd=(
|
java_cmd=(
|
||||||
"$JAVA" -Xms512m -Xmx2048m -Dfile.encoding=UTF-8
|
"$JAVA" -Xms512m -Xmx2048m -Dfile.encoding=UTF-8
|
||||||
-classpath "$classpath" ExecMain
|
-classpath "$classpath" ExecMain
|
||||||
"${2:-"$STUDY_RESULTS_DIR/REMcRdy_lm_only.csv"}"
|
"${2:-"$STUDY_RESULTS_DIR/combined_zscores.csv"}"
|
||||||
"${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
|
"${3:-"$APPS_DIR/java/GeneByGOAttributeMatrix_nofiltering-2009Dec07.tab"}"
|
||||||
"${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
|
"${4:-"$APPS_DIR/java/ORF_List_Without_DAmPs.txt"}"
|
||||||
1
|
1
|
||||||
@@ -2297,9 +2294,9 @@ main() {
|
|||||||
|
|
||||||
# Sanitize wrappers
|
# Sanitize wrappers
|
||||||
for i in "${!WRAPPERS[@]}"; do
|
for i in "${!WRAPPERS[@]}"; do
|
||||||
IFS=',' read -ra args <<< "$wrapper" # load the wrapper and args
|
IFS=',' read -ra args <<< "${WRAPPERS[$i]}" # load the wrapper and args
|
||||||
if ! [[ " ${ALL_WRAPPERS[*]} " =~ [[:space:]]${args[0]}[[:space:]] ]]; then
|
if ! [[ " ${ALL_WRAPPERS[*]} " =~ [[:space:]]${args[0]}[[:space:]] ]]; then
|
||||||
echo "Wrapper ${WRAPPERS[$i]} is not available, removing"
|
echo "Wrapper ${args[0]} is not available, removing"
|
||||||
unset "WRAPPERS[$i]"
|
unset "WRAPPERS[$i]"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|||||||
Reference in New Issue
Block a user